package convert import ( "fmt" "io" "log/slog" "os" "path/filepath" "sort" "time" "github.com/bytedance/sonic" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" "larc.wejust.rest/larc/internal/core" "larc.wejust.rest/larc/internal/hash" "larc.wejust.rest/larc/internal/repo" ) /* Git2Larc imports a git repository to larc format. * Converts git commits to sequential revisions with flat tree structure. */ // Git2LarcOptions configures the conversion type Git2LarcOptions struct { GitPath string // path to git repository LarcPath string // path for new larc repository Verbose bool // verbose output } // Git2LarcResult contains conversion results type Git2LarcResult struct { CommitsConverted int BlobsConverted int BranchesConverted int Mapping *MappingStore } // Git2Larc converts a git repository to larc func Git2Larc(opts Git2LarcOptions) (*Git2LarcResult, error) { log := slog.Default() if opts.Verbose { log = slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})) } log.Info("opening git repository", "path", opts.GitPath) /* open git repo */ gitRepo, err := git.PlainOpen(opts.GitPath) if err != nil { return nil, fmt.Errorf("open git repo: %w", err) } /* create larc repo */ log.Info("creating larc repository", "path", opts.LarcPath) larcRepo, err := repo.Init(opts.LarcPath) if err != nil { return nil, fmt.Errorf("init larc repo: %w", err) } defer larcRepo.Close() mapping := NewMappingStore() result := &Git2LarcResult{Mapping: mapping} /* get all commits in topological order */ commits, err := getCommitsInOrder(gitRepo) if err != nil { return nil, fmt.Errorf("get commits: %w", err) } if len(commits) == 0 { log.Info("no commits to convert") return result, nil } log.Info("converting commits", "count", len(commits)) /* process each commit */ revNumber := int64(1) for _, commit := range commits { log.Debug("converting commit", "sha", commit.Hash.String()[:8], "message", firstLine(commit.Message)) blobCount, err := convertCommit(gitRepo, larcRepo, commit, revNumber, mapping) if err != nil { return nil, fmt.Errorf("convert %s: %w", commit.Hash.String()[:8], err) } mapping.AddRevisionMapping(revNumber, commit.Hash.String()) result.CommitsConverted++ result.BlobsConverted += blobCount revNumber++ } /* convert branches */ refs, err := gitRepo.References() if err != nil { return nil, fmt.Errorf("get references: %w", err) } err = refs.ForEach(func(ref *plumbing.Reference) error { if !ref.Name().IsBranch() { return nil } branchName := ConvertRefToBranch(ref.Name().String()) commitSHA := ref.Hash().String() log.Debug("converting branch", "name", branchName, "sha", commitSHA[:8]) if err := convertGitBranch(larcRepo, branchName, commitSHA, mapping); err != nil { log.Warn("failed to convert branch", "name", branchName, "error", err) return nil // continue with other branches } result.BranchesConverted++ return nil }) if err != nil { return nil, fmt.Errorf("iterate references: %w", err) } /* set current branch and revision */ head, err := gitRepo.Head() if err == nil { if head.Name().IsBranch() { branchName := ConvertRefToBranch(head.Name().String()) larcRepo.SetCurrentBranch(branchName) } if rev, ok := mapping.GetLarcRev(head.Hash().String()); ok { larcRepo.SetCurrentRevision(rev) } } log.Info("conversion complete", "commits", result.CommitsConverted, "blobs", result.BlobsConverted, "branches", result.BranchesConverted) return result, nil } // getCommitsInOrder returns all commits in topological order (oldest first) func getCommitsInOrder(gitRepo *git.Repository) ([]*object.Commit, error) { /* get all commits */ commitIter, err := gitRepo.CommitObjects() if err != nil { return nil, fmt.Errorf("get commit objects: %w", err) } /* collect all commits */ var allCommits []*object.Commit commitMap := make(map[string]*object.Commit) err = commitIter.ForEach(func(c *object.Commit) error { allCommits = append(allCommits, c) commitMap[c.Hash.String()] = c return nil }) if err != nil { return nil, fmt.Errorf("iterate commits: %w", err) } /* topological sort using Kahn's algorithm */ inDegree := make(map[string]int) children := make(map[string][]string) for _, c := range allCommits { sha := c.Hash.String() if _, ok := inDegree[sha]; !ok { inDegree[sha] = 0 } for _, parent := range c.ParentHashes { parentSHA := parent.String() children[parentSHA] = append(children[parentSHA], sha) inDegree[sha]++ } } /* find roots (commits with no parents in our set) */ var queue []string for sha, degree := range inDegree { if degree == 0 { queue = append(queue, sha) } } /* sort queue by timestamp for deterministic order */ sort.Slice(queue, func(i, j int) bool { ci := commitMap[queue[i]] cj := commitMap[queue[j]] return ci.Committer.When.Before(cj.Committer.When) }) /* process in order */ var sorted []*object.Commit for len(queue) > 0 { sha := queue[0] queue = queue[1:] sorted = append(sorted, commitMap[sha]) /* sort children by timestamp */ childList := children[sha] sort.Slice(childList, func(i, j int) bool { ci := commitMap[childList[i]] cj := commitMap[childList[j]] return ci.Committer.When.Before(cj.Committer.When) }) for _, childSHA := range childList { inDegree[childSHA]-- if inDegree[childSHA] == 0 { queue = append(queue, childSHA) } } } return sorted, nil } // convertCommit converts a single git commit to a larc revision func convertCommit( gitRepo *git.Repository, larcRepo *repo.Repository, commit *object.Commit, revNumber int64, mapping *MappingStore, ) (int, error) { /* get commit tree */ tree, err := commit.Tree() if err != nil { return 0, fmt.Errorf("get tree: %w", err) } /* convert tree to flat entries */ entries, blobCount, err := convertGitTree(gitRepo, larcRepo, tree, "", mapping) if err != nil { return 0, fmt.Errorf("convert tree: %w", err) } /* determine parent revision */ var parent int64 var mergeParent int64 if commit.NumParents() > 0 { parentHash := commit.ParentHashes[0].String() if rev, ok := mapping.GetLarcRev(parentHash); ok { parent = rev } } if commit.NumParents() > 1 { mergeHash := commit.ParentHashes[1].String() if rev, ok := mapping.GetLarcRev(mergeHash); ok { mergeParent = rev } } /* determine branch from commit message or use default */ branch := core.DefaultBranchName if msgRev, ok := ParseRevisionFromMessage(commit.Message); ok { /* this was originally a larc commit, try to preserve branch */ _ = msgRev // could be used for verification } /* create larc revision */ rev := &core.Revision{ Number: revNumber, Timestamp: commit.Committer.When.Unix(), Author: commit.Author.Name, Message: cleanMessage(commit.Message), Branch: branch, Parent: parent, MergeParent: mergeParent, TreeHash: "", // will be computed } /* store tree and create revision */ larcTree := &core.Tree{Entries: entries} treeData, err := marshalTree(larcTree) if err != nil { return 0, fmt.Errorf("marshal tree: %w", err) } larcTree.Hash = hashBytes(treeData) rev.TreeHash = larcTree.Hash if err := larcRepo.Meta.StoreTree(larcTree.Hash, treeData); err != nil { return 0, fmt.Errorf("store tree: %w", err) } if err := larcRepo.Meta.CreateRevision(rev); err != nil { return 0, fmt.Errorf("create revision: %w", err) } return blobCount, nil } // convertGitTree recursively converts git tree to flat larc entries func convertGitTree( gitRepo *git.Repository, larcRepo *repo.Repository, tree *object.Tree, prefix string, mapping *MappingStore, ) ([]core.TreeEntry, int, error) { var entries []core.TreeEntry blobCount := 0 for _, entry := range tree.Entries { path := entry.Name if prefix != "" { path = prefix + "/" + entry.Name } if entry.Mode.IsFile() { /* convert blob */ larcHash, size, isNew, err := convertGitBlob(gitRepo, larcRepo, entry.Hash.String(), mapping) if err != nil { return nil, 0, fmt.Errorf("convert blob %s: %w", path, err) } if isNew { blobCount++ } mode, kind := ConvertGitMode(uint32(entry.Mode)) entries = append(entries, core.TreeEntry{ Path: path, Mode: mode, Size: size, BlobHash: larcHash, Kind: kind, }) } else if entry.Mode == 0040000 { // directory /* recurse into subtree */ subTree, err := gitRepo.TreeObject(entry.Hash) if err != nil { return nil, 0, fmt.Errorf("get subtree %s: %w", path, err) } subEntries, subBlobs, err := convertGitTree(gitRepo, larcRepo, subTree, path, mapping) if err != nil { return nil, 0, err } entries = append(entries, subEntries...) blobCount += subBlobs } } return entries, blobCount, nil } // convertGitBlob converts a git blob to larc blob func convertGitBlob( gitRepo *git.Repository, larcRepo *repo.Repository, gitSHA string, mapping *MappingStore, ) (string, int64, bool, error) { /* check reverse mapping (git SHA -> larc hash) */ for larcHash, gSHA := range mapping.BlobMap { if gSHA == gitSHA { /* already converted, get size */ data, err := larcRepo.Blobs.Read(larcHash) if err == nil { return larcHash, int64(len(data)), false, nil } } } /* read git blob */ blob, err := gitRepo.BlobObject(plumbing.NewHash(gitSHA)) if err != nil { return "", 0, false, fmt.Errorf("get git blob: %w", err) } reader, err := blob.Reader() if err != nil { return "", 0, false, fmt.Errorf("blob reader: %w", err) } defer reader.Close() data, err := io.ReadAll(reader) if err != nil { return "", 0, false, fmt.Errorf("read blob: %w", err) } /* write to larc blob store */ larcHash, err := larcRepo.Blobs.Write(data) if err != nil { return "", 0, false, fmt.Errorf("write larc blob: %w", err) } mapping.AddBlobMapping(larcHash, gitSHA) return larcHash, int64(len(data)), true, nil } // convertGitBranch creates a larc branch from git branch func convertGitBranch( larcRepo *repo.Repository, name string, commitSHA string, mapping *MappingStore, ) error { rev, ok := mapping.GetLarcRev(commitSHA) if !ok { return fmt.Errorf("no revision for commit %s", commitSHA[:8]) } /* check if branch already exists (created during init) */ existing, err := larcRepo.Meta.GetBranch(name) if err == nil { /* update existing branch */ return larcRepo.Meta.UpdateBranchHead(name, rev) } _ = existing /* create new branch */ branch := &core.Branch{ Name: name, HeadRev: rev, CreatedAt: time.Now().Unix(), CreatedFrom: rev, } return larcRepo.Meta.CreateBranch(branch) } // helper functions func firstLine(s string) string { for i, r := range s { if r == '\n' { return s[:i] } } return s } func cleanMessage(msg string) string { /* remove [larc:rN] suffix if present */ if rev, ok := ParseRevisionFromMessage(msg); ok { _ = rev idx := len(msg) - 1 for idx >= 0 && msg[idx] != '[' { idx-- } if idx > 0 { msg = msg[:idx] } } /* trim trailing whitespace */ for len(msg) > 0 && (msg[len(msg)-1] == '\n' || msg[len(msg)-1] == ' ') { msg = msg[:len(msg)-1] } return msg } func marshalTree(tree *core.Tree) ([]byte, error) { /* use sonic for JSON marshaling (consistent with repo.go) */ return sonic.Marshal(tree) } func hashBytes(data []byte) string { /* use xxhash64 like the rest of larc */ return hash.Bytes(data) } // ImportFromGit is a convenience function for CLI func ImportFromGit(gitPath, larcPath string, verbose bool) error { /* resolve paths */ absGit, err := filepath.Abs(gitPath) if err != nil { return fmt.Errorf("resolve git path: %w", err) } absLarc, err := filepath.Abs(larcPath) if err != nil { return fmt.Errorf("resolve larc path: %w", err) } /* check git repo exists */ if _, err := os.Stat(filepath.Join(absGit, ".git")); os.IsNotExist(err) { return fmt.Errorf("not a git repository: %s", absGit) } /* check larc path doesn't exist */ if _, err := os.Stat(filepath.Join(absLarc, ".larc")); err == nil { return fmt.Errorf("larc repository already exists: %s", absLarc) } opts := Git2LarcOptions{ GitPath: absGit, LarcPath: absLarc, Verbose: verbose, } result, err := Git2Larc(opts) if err != nil { return err } fmt.Printf("Imported git repository to larc:\n") fmt.Printf(" Commits: %d\n", result.CommitsConverted) fmt.Printf(" Blobs: %d\n", result.BlobsConverted) fmt.Printf(" Branches: %d\n", result.BranchesConverted) return nil }