larc r21

520 lines ยท 12.4 KB Raw
1 package convert
2
3 import (
4 "fmt"
5 "io"
6 "log/slog"
7 "os"
8 "path/filepath"
9 "sort"
10 "time"
11
12 "github.com/bytedance/sonic"
13 "github.com/go-git/go-git/v5"
14 "github.com/go-git/go-git/v5/plumbing"
15 "github.com/go-git/go-git/v5/plumbing/object"
16
17 "larc.wejust.rest/larc/internal/core"
18 "larc.wejust.rest/larc/internal/hash"
19 "larc.wejust.rest/larc/internal/repo"
20 )
21
22 /* Git2Larc imports a git repository to larc format.
23 * Converts git commits to sequential revisions with flat tree structure. */
24
25 // Git2LarcOptions configures the conversion
26 type Git2LarcOptions struct {
27 GitPath string // path to git repository
28 LarcPath string // path for new larc repository
29 Verbose bool // verbose output
30 }
31
32 // Git2LarcResult contains conversion results
33 type Git2LarcResult struct {
34 CommitsConverted int
35 BlobsConverted int
36 BranchesConverted int
37 Mapping *MappingStore
38 }
39
40 // Git2Larc converts a git repository to larc
41 func Git2Larc(opts Git2LarcOptions) (*Git2LarcResult, error) {
42 log := slog.Default()
43 if opts.Verbose {
44 log = slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}))
45 }
46
47 log.Info("opening git repository", "path", opts.GitPath)
48
49 /* open git repo */
50 gitRepo, err := git.PlainOpen(opts.GitPath)
51 if err != nil {
52 return nil, fmt.Errorf("open git repo: %w", err)
53 }
54
55 /* create larc repo */
56 log.Info("creating larc repository", "path", opts.LarcPath)
57
58 larcRepo, err := repo.Init(opts.LarcPath)
59 if err != nil {
60 return nil, fmt.Errorf("init larc repo: %w", err)
61 }
62 defer larcRepo.Close()
63
64 mapping := NewMappingStore()
65 result := &Git2LarcResult{Mapping: mapping}
66
67 /* get all commits in topological order */
68 commits, err := getCommitsInOrder(gitRepo)
69 if err != nil {
70 return nil, fmt.Errorf("get commits: %w", err)
71 }
72
73 if len(commits) == 0 {
74 log.Info("no commits to convert")
75 return result, nil
76 }
77
78 log.Info("converting commits", "count", len(commits))
79
80 /* process each commit */
81 revNumber := int64(1)
82 for _, commit := range commits {
83 log.Debug("converting commit",
84 "sha", commit.Hash.String()[:8],
85 "message", firstLine(commit.Message))
86
87 blobCount, err := convertCommit(gitRepo, larcRepo, commit, revNumber, mapping)
88 if err != nil {
89 return nil, fmt.Errorf("convert %s: %w", commit.Hash.String()[:8], err)
90 }
91
92 mapping.AddRevisionMapping(revNumber, commit.Hash.String())
93 result.CommitsConverted++
94 result.BlobsConverted += blobCount
95 revNumber++
96 }
97
98 /* convert branches */
99 refs, err := gitRepo.References()
100 if err != nil {
101 return nil, fmt.Errorf("get references: %w", err)
102 }
103
104 err = refs.ForEach(func(ref *plumbing.Reference) error {
105 if !ref.Name().IsBranch() {
106 return nil
107 }
108
109 branchName := ConvertRefToBranch(ref.Name().String())
110 commitSHA := ref.Hash().String()
111
112 log.Debug("converting branch", "name", branchName, "sha", commitSHA[:8])
113
114 if err := convertGitBranch(larcRepo, branchName, commitSHA, mapping); err != nil {
115 log.Warn("failed to convert branch", "name", branchName, "error", err)
116 return nil // continue with other branches
117 }
118
119 result.BranchesConverted++
120 return nil
121 })
122
123 if err != nil {
124 return nil, fmt.Errorf("iterate references: %w", err)
125 }
126
127 /* set current branch and revision */
128 head, err := gitRepo.Head()
129 if err == nil {
130 if head.Name().IsBranch() {
131 branchName := ConvertRefToBranch(head.Name().String())
132 larcRepo.SetCurrentBranch(branchName)
133 }
134 if rev, ok := mapping.GetLarcRev(head.Hash().String()); ok {
135 larcRepo.SetCurrentRevision(rev)
136 }
137 }
138
139 log.Info("conversion complete",
140 "commits", result.CommitsConverted,
141 "blobs", result.BlobsConverted,
142 "branches", result.BranchesConverted)
143
144 return result, nil
145 }
146
147 // getCommitsInOrder returns all commits in topological order (oldest first)
148 func getCommitsInOrder(gitRepo *git.Repository) ([]*object.Commit, error) {
149 /* get all commits */
150 commitIter, err := gitRepo.CommitObjects()
151 if err != nil {
152 return nil, fmt.Errorf("get commit objects: %w", err)
153 }
154
155 /* collect all commits */
156 var allCommits []*object.Commit
157 commitMap := make(map[string]*object.Commit)
158
159 err = commitIter.ForEach(func(c *object.Commit) error {
160 allCommits = append(allCommits, c)
161 commitMap[c.Hash.String()] = c
162 return nil
163 })
164 if err != nil {
165 return nil, fmt.Errorf("iterate commits: %w", err)
166 }
167
168 /* topological sort using Kahn's algorithm */
169 inDegree := make(map[string]int)
170 children := make(map[string][]string)
171
172 for _, c := range allCommits {
173 sha := c.Hash.String()
174 if _, ok := inDegree[sha]; !ok {
175 inDegree[sha] = 0
176 }
177
178 for _, parent := range c.ParentHashes {
179 parentSHA := parent.String()
180 children[parentSHA] = append(children[parentSHA], sha)
181 inDegree[sha]++
182 }
183 }
184
185 /* find roots (commits with no parents in our set) */
186 var queue []string
187 for sha, degree := range inDegree {
188 if degree == 0 {
189 queue = append(queue, sha)
190 }
191 }
192
193 /* sort queue by timestamp for deterministic order */
194 sort.Slice(queue, func(i, j int) bool {
195 ci := commitMap[queue[i]]
196 cj := commitMap[queue[j]]
197 return ci.Committer.When.Before(cj.Committer.When)
198 })
199
200 /* process in order */
201 var sorted []*object.Commit
202 for len(queue) > 0 {
203 sha := queue[0]
204 queue = queue[1:]
205
206 sorted = append(sorted, commitMap[sha])
207
208 /* sort children by timestamp */
209 childList := children[sha]
210 sort.Slice(childList, func(i, j int) bool {
211 ci := commitMap[childList[i]]
212 cj := commitMap[childList[j]]
213 return ci.Committer.When.Before(cj.Committer.When)
214 })
215
216 for _, childSHA := range childList {
217 inDegree[childSHA]--
218 if inDegree[childSHA] == 0 {
219 queue = append(queue, childSHA)
220 }
221 }
222 }
223
224 return sorted, nil
225 }
226
227 // convertCommit converts a single git commit to a larc revision
228 func convertCommit(
229 gitRepo *git.Repository,
230 larcRepo *repo.Repository,
231 commit *object.Commit,
232 revNumber int64,
233 mapping *MappingStore,
234 ) (int, error) {
235 /* get commit tree */
236 tree, err := commit.Tree()
237 if err != nil {
238 return 0, fmt.Errorf("get tree: %w", err)
239 }
240
241 /* convert tree to flat entries */
242 entries, blobCount, err := convertGitTree(gitRepo, larcRepo, tree, "", mapping)
243 if err != nil {
244 return 0, fmt.Errorf("convert tree: %w", err)
245 }
246
247 /* determine parent revision */
248 var parent int64
249 var mergeParent int64
250
251 if commit.NumParents() > 0 {
252 parentHash := commit.ParentHashes[0].String()
253 if rev, ok := mapping.GetLarcRev(parentHash); ok {
254 parent = rev
255 }
256 }
257
258 if commit.NumParents() > 1 {
259 mergeHash := commit.ParentHashes[1].String()
260 if rev, ok := mapping.GetLarcRev(mergeHash); ok {
261 mergeParent = rev
262 }
263 }
264
265 /* determine branch from commit message or use default */
266 branch := core.DefaultBranchName
267 if msgRev, ok := ParseRevisionFromMessage(commit.Message); ok {
268 /* this was originally a larc commit, try to preserve branch */
269 _ = msgRev // could be used for verification
270 }
271
272 /* create larc revision */
273 rev := &core.Revision{
274 Number: revNumber,
275 Timestamp: commit.Committer.When.Unix(),
276 Author: commit.Author.Name,
277 Message: cleanMessage(commit.Message),
278 Branch: branch,
279 Parent: parent,
280 MergeParent: mergeParent,
281 TreeHash: "", // will be computed
282 }
283
284 /* store tree and create revision */
285 larcTree := &core.Tree{Entries: entries}
286 treeData, err := marshalTree(larcTree)
287 if err != nil {
288 return 0, fmt.Errorf("marshal tree: %w", err)
289 }
290
291 larcTree.Hash = hashBytes(treeData)
292 rev.TreeHash = larcTree.Hash
293
294 if err := larcRepo.Meta.StoreTree(larcTree.Hash, treeData); err != nil {
295 return 0, fmt.Errorf("store tree: %w", err)
296 }
297
298 if err := larcRepo.Meta.CreateRevision(rev); err != nil {
299 return 0, fmt.Errorf("create revision: %w", err)
300 }
301
302 return blobCount, nil
303 }
304
305 // convertGitTree recursively converts git tree to flat larc entries
306 func convertGitTree(
307 gitRepo *git.Repository,
308 larcRepo *repo.Repository,
309 tree *object.Tree,
310 prefix string,
311 mapping *MappingStore,
312 ) ([]core.TreeEntry, int, error) {
313 var entries []core.TreeEntry
314 blobCount := 0
315
316 for _, entry := range tree.Entries {
317 path := entry.Name
318 if prefix != "" {
319 path = prefix + "/" + entry.Name
320 }
321
322 if entry.Mode.IsFile() {
323 /* convert blob */
324 larcHash, size, isNew, err := convertGitBlob(gitRepo, larcRepo, entry.Hash.String(), mapping)
325 if err != nil {
326 return nil, 0, fmt.Errorf("convert blob %s: %w", path, err)
327 }
328 if isNew {
329 blobCount++
330 }
331
332 mode, kind := ConvertGitMode(uint32(entry.Mode))
333
334 entries = append(entries, core.TreeEntry{
335 Path: path,
336 Mode: mode,
337 Size: size,
338 BlobHash: larcHash,
339 Kind: kind,
340 })
341 } else if entry.Mode == 0040000 { // directory
342 /* recurse into subtree */
343 subTree, err := gitRepo.TreeObject(entry.Hash)
344 if err != nil {
345 return nil, 0, fmt.Errorf("get subtree %s: %w", path, err)
346 }
347
348 subEntries, subBlobs, err := convertGitTree(gitRepo, larcRepo, subTree, path, mapping)
349 if err != nil {
350 return nil, 0, err
351 }
352
353 entries = append(entries, subEntries...)
354 blobCount += subBlobs
355 }
356 }
357
358 return entries, blobCount, nil
359 }
360
361 // convertGitBlob converts a git blob to larc blob
362 func convertGitBlob(
363 gitRepo *git.Repository,
364 larcRepo *repo.Repository,
365 gitSHA string,
366 mapping *MappingStore,
367 ) (string, int64, bool, error) {
368 /* check reverse mapping (git SHA -> larc hash) */
369 for larcHash, gSHA := range mapping.BlobMap {
370 if gSHA == gitSHA {
371 /* already converted, get size */
372 data, err := larcRepo.Blobs.Read(larcHash)
373 if err == nil {
374 return larcHash, int64(len(data)), false, nil
375 }
376 }
377 }
378
379 /* read git blob */
380 blob, err := gitRepo.BlobObject(plumbing.NewHash(gitSHA))
381 if err != nil {
382 return "", 0, false, fmt.Errorf("get git blob: %w", err)
383 }
384
385 reader, err := blob.Reader()
386 if err != nil {
387 return "", 0, false, fmt.Errorf("blob reader: %w", err)
388 }
389 defer reader.Close()
390
391 data, err := io.ReadAll(reader)
392 if err != nil {
393 return "", 0, false, fmt.Errorf("read blob: %w", err)
394 }
395
396 /* write to larc blob store */
397 larcHash, err := larcRepo.Blobs.Write(data)
398 if err != nil {
399 return "", 0, false, fmt.Errorf("write larc blob: %w", err)
400 }
401
402 mapping.AddBlobMapping(larcHash, gitSHA)
403 return larcHash, int64(len(data)), true, nil
404 }
405
406 // convertGitBranch creates a larc branch from git branch
407 func convertGitBranch(
408 larcRepo *repo.Repository,
409 name string,
410 commitSHA string,
411 mapping *MappingStore,
412 ) error {
413 rev, ok := mapping.GetLarcRev(commitSHA)
414 if !ok {
415 return fmt.Errorf("no revision for commit %s", commitSHA[:8])
416 }
417
418 /* check if branch already exists (created during init) */
419 existing, err := larcRepo.Meta.GetBranch(name)
420 if err == nil {
421 /* update existing branch */
422 return larcRepo.Meta.UpdateBranchHead(name, rev)
423 }
424 _ = existing
425
426 /* create new branch */
427 branch := &core.Branch{
428 Name: name,
429 HeadRev: rev,
430 CreatedAt: time.Now().Unix(),
431 CreatedFrom: rev,
432 }
433
434 return larcRepo.Meta.CreateBranch(branch)
435 }
436
437 // helper functions
438
439 func firstLine(s string) string {
440 for i, r := range s {
441 if r == '\n' {
442 return s[:i]
443 }
444 }
445 return s
446 }
447
448 func cleanMessage(msg string) string {
449 /* remove [larc:rN] suffix if present */
450 if rev, ok := ParseRevisionFromMessage(msg); ok {
451 _ = rev
452 idx := len(msg) - 1
453 for idx >= 0 && msg[idx] != '[' {
454 idx--
455 }
456 if idx > 0 {
457 msg = msg[:idx]
458 }
459 }
460
461 /* trim trailing whitespace */
462 for len(msg) > 0 && (msg[len(msg)-1] == '\n' || msg[len(msg)-1] == ' ') {
463 msg = msg[:len(msg)-1]
464 }
465
466 return msg
467 }
468
469 func marshalTree(tree *core.Tree) ([]byte, error) {
470 /* use sonic for JSON marshaling (consistent with repo.go) */
471 return sonic.Marshal(tree)
472 }
473
474 func hashBytes(data []byte) string {
475 /* use xxhash64 like the rest of larc */
476 return hash.Bytes(data)
477 }
478
479 // ImportFromGit is a convenience function for CLI
480 func ImportFromGit(gitPath, larcPath string, verbose bool) error {
481 /* resolve paths */
482 absGit, err := filepath.Abs(gitPath)
483 if err != nil {
484 return fmt.Errorf("resolve git path: %w", err)
485 }
486
487 absLarc, err := filepath.Abs(larcPath)
488 if err != nil {
489 return fmt.Errorf("resolve larc path: %w", err)
490 }
491
492 /* check git repo exists */
493 if _, err := os.Stat(filepath.Join(absGit, ".git")); os.IsNotExist(err) {
494 return fmt.Errorf("not a git repository: %s", absGit)
495 }
496
497 /* check larc path doesn't exist */
498 if _, err := os.Stat(filepath.Join(absLarc, ".larc")); err == nil {
499 return fmt.Errorf("larc repository already exists: %s", absLarc)
500 }
501
502 opts := Git2LarcOptions{
503 GitPath: absGit,
504 LarcPath: absLarc,
505 Verbose: verbose,
506 }
507
508 result, err := Git2Larc(opts)
509 if err != nil {
510 return err
511 }
512
513 fmt.Printf("Imported git repository to larc:\n")
514 fmt.Printf(" Commits: %d\n", result.CommitsConverted)
515 fmt.Printf(" Blobs: %d\n", result.BlobsConverted)
516 fmt.Printf(" Branches: %d\n", result.BranchesConverted)
517
518 return nil
519 }
520