| 1 |
package convert |
| 2 |
|
| 3 |
import ( |
| 4 |
"fmt" |
| 5 |
"io" |
| 6 |
"log/slog" |
| 7 |
"os" |
| 8 |
"path/filepath" |
| 9 |
"sort" |
| 10 |
"time" |
| 11 |
|
| 12 |
"github.com/bytedance/sonic" |
| 13 |
"github.com/go-git/go-git/v5" |
| 14 |
"github.com/go-git/go-git/v5/plumbing" |
| 15 |
"github.com/go-git/go-git/v5/plumbing/object" |
| 16 |
|
| 17 |
"github.com/lain/larc/internal/core" |
| 18 |
"github.com/lain/larc/internal/hash" |
| 19 |
"github.com/lain/larc/internal/repo" |
| 20 |
) |
| 21 |
|
| 22 |
/* Git2Larc imports a git repository to larc format. |
| 23 |
* Converts git commits to sequential revisions with flat tree structure. */ |
| 24 |
|
| 25 |
// Git2LarcOptions configures the conversion |
| 26 |
type Git2LarcOptions struct { |
| 27 |
GitPath string // path to git repository |
| 28 |
LarcPath string // path for new larc repository |
| 29 |
Verbose bool // verbose output |
| 30 |
} |
| 31 |
|
| 32 |
// Git2LarcResult contains conversion results |
| 33 |
type Git2LarcResult struct { |
| 34 |
CommitsConverted int |
| 35 |
BlobsConverted int |
| 36 |
BranchesConverted int |
| 37 |
Mapping *MappingStore |
| 38 |
} |
| 39 |
|
| 40 |
// Git2Larc converts a git repository to larc |
| 41 |
func Git2Larc(opts Git2LarcOptions) (*Git2LarcResult, error) { |
| 42 |
log := slog.Default() |
| 43 |
if opts.Verbose { |
| 44 |
log = slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug})) |
| 45 |
} |
| 46 |
|
| 47 |
log.Info("opening git repository", "path", opts.GitPath) |
| 48 |
|
| 49 |
/* open git repo */ |
| 50 |
gitRepo, err := git.PlainOpen(opts.GitPath) |
| 51 |
if err != nil { |
| 52 |
return nil, fmt.Errorf("open git repo: %w", err) |
| 53 |
} |
| 54 |
|
| 55 |
/* create larc repo */ |
| 56 |
log.Info("creating larc repository", "path", opts.LarcPath) |
| 57 |
|
| 58 |
larcRepo, err := repo.Init(opts.LarcPath) |
| 59 |
if err != nil { |
| 60 |
return nil, fmt.Errorf("init larc repo: %w", err) |
| 61 |
} |
| 62 |
defer larcRepo.Close() |
| 63 |
|
| 64 |
mapping := NewMappingStore() |
| 65 |
result := &Git2LarcResult{Mapping: mapping} |
| 66 |
|
| 67 |
/* get all commits in topological order */ |
| 68 |
commits, err := getCommitsInOrder(gitRepo) |
| 69 |
if err != nil { |
| 70 |
return nil, fmt.Errorf("get commits: %w", err) |
| 71 |
} |
| 72 |
|
| 73 |
if len(commits) == 0 { |
| 74 |
log.Info("no commits to convert") |
| 75 |
return result, nil |
| 76 |
} |
| 77 |
|
| 78 |
log.Info("converting commits", "count", len(commits)) |
| 79 |
|
| 80 |
/* process each commit */ |
| 81 |
revNumber := int64(1) |
| 82 |
for _, commit := range commits { |
| 83 |
log.Debug("converting commit", |
| 84 |
"sha", commit.Hash.String()[:8], |
| 85 |
"message", firstLine(commit.Message)) |
| 86 |
|
| 87 |
blobCount, err := convertCommit(gitRepo, larcRepo, commit, revNumber, mapping) |
| 88 |
if err != nil { |
| 89 |
return nil, fmt.Errorf("convert %s: %w", commit.Hash.String()[:8], err) |
| 90 |
} |
| 91 |
|
| 92 |
mapping.AddRevisionMapping(revNumber, commit.Hash.String()) |
| 93 |
result.CommitsConverted++ |
| 94 |
result.BlobsConverted += blobCount |
| 95 |
revNumber++ |
| 96 |
} |
| 97 |
|
| 98 |
/* convert branches */ |
| 99 |
refs, err := gitRepo.References() |
| 100 |
if err != nil { |
| 101 |
return nil, fmt.Errorf("get references: %w", err) |
| 102 |
} |
| 103 |
|
| 104 |
err = refs.ForEach(func(ref *plumbing.Reference) error { |
| 105 |
if !ref.Name().IsBranch() { |
| 106 |
return nil |
| 107 |
} |
| 108 |
|
| 109 |
branchName := ConvertRefToBranch(ref.Name().String()) |
| 110 |
commitSHA := ref.Hash().String() |
| 111 |
|
| 112 |
log.Debug("converting branch", "name", branchName, "sha", commitSHA[:8]) |
| 113 |
|
| 114 |
if err := convertGitBranch(larcRepo, branchName, commitSHA, mapping); err != nil { |
| 115 |
log.Warn("failed to convert branch", "name", branchName, "error", err) |
| 116 |
return nil // continue with other branches |
| 117 |
} |
| 118 |
|
| 119 |
result.BranchesConverted++ |
| 120 |
return nil |
| 121 |
}) |
| 122 |
|
| 123 |
if err != nil { |
| 124 |
return nil, fmt.Errorf("iterate references: %w", err) |
| 125 |
} |
| 126 |
|
| 127 |
/* set current branch and revision */ |
| 128 |
head, err := gitRepo.Head() |
| 129 |
if err == nil { |
| 130 |
if head.Name().IsBranch() { |
| 131 |
branchName := ConvertRefToBranch(head.Name().String()) |
| 132 |
larcRepo.SetCurrentBranch(branchName) |
| 133 |
} |
| 134 |
if rev, ok := mapping.GetLarcRev(head.Hash().String()); ok { |
| 135 |
larcRepo.SetCurrentRevision(rev) |
| 136 |
} |
| 137 |
} |
| 138 |
|
| 139 |
log.Info("conversion complete", |
| 140 |
"commits", result.CommitsConverted, |
| 141 |
"blobs", result.BlobsConverted, |
| 142 |
"branches", result.BranchesConverted) |
| 143 |
|
| 144 |
return result, nil |
| 145 |
} |
| 146 |
|
| 147 |
// getCommitsInOrder returns all commits in topological order (oldest first) |
| 148 |
func getCommitsInOrder(gitRepo *git.Repository) ([]*object.Commit, error) { |
| 149 |
/* get all commits */ |
| 150 |
commitIter, err := gitRepo.CommitObjects() |
| 151 |
if err != nil { |
| 152 |
return nil, fmt.Errorf("get commit objects: %w", err) |
| 153 |
} |
| 154 |
|
| 155 |
/* collect all commits */ |
| 156 |
var allCommits []*object.Commit |
| 157 |
commitMap := make(map[string]*object.Commit) |
| 158 |
|
| 159 |
err = commitIter.ForEach(func(c *object.Commit) error { |
| 160 |
allCommits = append(allCommits, c) |
| 161 |
commitMap[c.Hash.String()] = c |
| 162 |
return nil |
| 163 |
}) |
| 164 |
if err != nil { |
| 165 |
return nil, fmt.Errorf("iterate commits: %w", err) |
| 166 |
} |
| 167 |
|
| 168 |
/* topological sort using Kahn's algorithm */ |
| 169 |
inDegree := make(map[string]int) |
| 170 |
children := make(map[string][]string) |
| 171 |
|
| 172 |
for _, c := range allCommits { |
| 173 |
sha := c.Hash.String() |
| 174 |
if _, ok := inDegree[sha]; !ok { |
| 175 |
inDegree[sha] = 0 |
| 176 |
} |
| 177 |
|
| 178 |
for _, parent := range c.ParentHashes { |
| 179 |
parentSHA := parent.String() |
| 180 |
children[parentSHA] = append(children[parentSHA], sha) |
| 181 |
inDegree[sha]++ |
| 182 |
} |
| 183 |
} |
| 184 |
|
| 185 |
/* find roots (commits with no parents in our set) */ |
| 186 |
var queue []string |
| 187 |
for sha, degree := range inDegree { |
| 188 |
if degree == 0 { |
| 189 |
queue = append(queue, sha) |
| 190 |
} |
| 191 |
} |
| 192 |
|
| 193 |
/* sort queue by timestamp for deterministic order */ |
| 194 |
sort.Slice(queue, func(i, j int) bool { |
| 195 |
ci := commitMap[queue[i]] |
| 196 |
cj := commitMap[queue[j]] |
| 197 |
return ci.Committer.When.Before(cj.Committer.When) |
| 198 |
}) |
| 199 |
|
| 200 |
/* process in order */ |
| 201 |
var sorted []*object.Commit |
| 202 |
for len(queue) > 0 { |
| 203 |
sha := queue[0] |
| 204 |
queue = queue[1:] |
| 205 |
|
| 206 |
sorted = append(sorted, commitMap[sha]) |
| 207 |
|
| 208 |
/* sort children by timestamp */ |
| 209 |
childList := children[sha] |
| 210 |
sort.Slice(childList, func(i, j int) bool { |
| 211 |
ci := commitMap[childList[i]] |
| 212 |
cj := commitMap[childList[j]] |
| 213 |
return ci.Committer.When.Before(cj.Committer.When) |
| 214 |
}) |
| 215 |
|
| 216 |
for _, childSHA := range childList { |
| 217 |
inDegree[childSHA]-- |
| 218 |
if inDegree[childSHA] == 0 { |
| 219 |
queue = append(queue, childSHA) |
| 220 |
} |
| 221 |
} |
| 222 |
} |
| 223 |
|
| 224 |
return sorted, nil |
| 225 |
} |
| 226 |
|
| 227 |
// convertCommit converts a single git commit to a larc revision |
| 228 |
func convertCommit( |
| 229 |
gitRepo *git.Repository, |
| 230 |
larcRepo *repo.Repository, |
| 231 |
commit *object.Commit, |
| 232 |
revNumber int64, |
| 233 |
mapping *MappingStore, |
| 234 |
) (int, error) { |
| 235 |
/* get commit tree */ |
| 236 |
tree, err := commit.Tree() |
| 237 |
if err != nil { |
| 238 |
return 0, fmt.Errorf("get tree: %w", err) |
| 239 |
} |
| 240 |
|
| 241 |
/* convert tree to flat entries */ |
| 242 |
entries, blobCount, err := convertGitTree(gitRepo, larcRepo, tree, "", mapping) |
| 243 |
if err != nil { |
| 244 |
return 0, fmt.Errorf("convert tree: %w", err) |
| 245 |
} |
| 246 |
|
| 247 |
/* determine parent revision */ |
| 248 |
var parent int64 |
| 249 |
var mergeParent int64 |
| 250 |
|
| 251 |
if commit.NumParents() > 0 { |
| 252 |
parentHash := commit.ParentHashes[0].String() |
| 253 |
if rev, ok := mapping.GetLarcRev(parentHash); ok { |
| 254 |
parent = rev |
| 255 |
} |
| 256 |
} |
| 257 |
|
| 258 |
if commit.NumParents() > 1 { |
| 259 |
mergeHash := commit.ParentHashes[1].String() |
| 260 |
if rev, ok := mapping.GetLarcRev(mergeHash); ok { |
| 261 |
mergeParent = rev |
| 262 |
} |
| 263 |
} |
| 264 |
|
| 265 |
/* determine branch from commit message or use default */ |
| 266 |
branch := core.DefaultBranchName |
| 267 |
if msgRev, ok := ParseRevisionFromMessage(commit.Message); ok { |
| 268 |
/* this was originally a larc commit, try to preserve branch */ |
| 269 |
_ = msgRev // could be used for verification |
| 270 |
} |
| 271 |
|
| 272 |
/* create larc revision */ |
| 273 |
rev := &core.Revision{ |
| 274 |
Number: revNumber, |
| 275 |
Timestamp: commit.Committer.When.Unix(), |
| 276 |
Author: commit.Author.Name, |
| 277 |
Message: cleanMessage(commit.Message), |
| 278 |
Branch: branch, |
| 279 |
Parent: parent, |
| 280 |
MergeParent: mergeParent, |
| 281 |
TreeHash: "", // will be computed |
| 282 |
} |
| 283 |
|
| 284 |
/* store tree and create revision */ |
| 285 |
larcTree := &core.Tree{Entries: entries} |
| 286 |
treeData, err := marshalTree(larcTree) |
| 287 |
if err != nil { |
| 288 |
return 0, fmt.Errorf("marshal tree: %w", err) |
| 289 |
} |
| 290 |
|
| 291 |
larcTree.Hash = hashBytes(treeData) |
| 292 |
rev.TreeHash = larcTree.Hash |
| 293 |
|
| 294 |
if err := larcRepo.Meta.StoreTree(larcTree.Hash, treeData); err != nil { |
| 295 |
return 0, fmt.Errorf("store tree: %w", err) |
| 296 |
} |
| 297 |
|
| 298 |
if err := larcRepo.Meta.CreateRevision(rev); err != nil { |
| 299 |
return 0, fmt.Errorf("create revision: %w", err) |
| 300 |
} |
| 301 |
|
| 302 |
return blobCount, nil |
| 303 |
} |
| 304 |
|
| 305 |
// convertGitTree recursively converts git tree to flat larc entries |
| 306 |
func convertGitTree( |
| 307 |
gitRepo *git.Repository, |
| 308 |
larcRepo *repo.Repository, |
| 309 |
tree *object.Tree, |
| 310 |
prefix string, |
| 311 |
mapping *MappingStore, |
| 312 |
) ([]core.TreeEntry, int, error) { |
| 313 |
var entries []core.TreeEntry |
| 314 |
blobCount := 0 |
| 315 |
|
| 316 |
for _, entry := range tree.Entries { |
| 317 |
path := entry.Name |
| 318 |
if prefix != "" { |
| 319 |
path = prefix + "/" + entry.Name |
| 320 |
} |
| 321 |
|
| 322 |
if entry.Mode.IsFile() { |
| 323 |
/* convert blob */ |
| 324 |
larcHash, size, isNew, err := convertGitBlob(gitRepo, larcRepo, entry.Hash.String(), mapping) |
| 325 |
if err != nil { |
| 326 |
return nil, 0, fmt.Errorf("convert blob %s: %w", path, err) |
| 327 |
} |
| 328 |
if isNew { |
| 329 |
blobCount++ |
| 330 |
} |
| 331 |
|
| 332 |
mode, kind := ConvertGitMode(uint32(entry.Mode)) |
| 333 |
|
| 334 |
entries = append(entries, core.TreeEntry{ |
| 335 |
Path: path, |
| 336 |
Mode: mode, |
| 337 |
Size: size, |
| 338 |
BlobHash: larcHash, |
| 339 |
Kind: kind, |
| 340 |
}) |
| 341 |
} else if entry.Mode == 0040000 { // directory |
| 342 |
/* recurse into subtree */ |
| 343 |
subTree, err := gitRepo.TreeObject(entry.Hash) |
| 344 |
if err != nil { |
| 345 |
return nil, 0, fmt.Errorf("get subtree %s: %w", path, err) |
| 346 |
} |
| 347 |
|
| 348 |
subEntries, subBlobs, err := convertGitTree(gitRepo, larcRepo, subTree, path, mapping) |
| 349 |
if err != nil { |
| 350 |
return nil, 0, err |
| 351 |
} |
| 352 |
|
| 353 |
entries = append(entries, subEntries...) |
| 354 |
blobCount += subBlobs |
| 355 |
} |
| 356 |
} |
| 357 |
|
| 358 |
return entries, blobCount, nil |
| 359 |
} |
| 360 |
|
| 361 |
// convertGitBlob converts a git blob to larc blob |
| 362 |
func convertGitBlob( |
| 363 |
gitRepo *git.Repository, |
| 364 |
larcRepo *repo.Repository, |
| 365 |
gitSHA string, |
| 366 |
mapping *MappingStore, |
| 367 |
) (string, int64, bool, error) { |
| 368 |
/* check reverse mapping (git SHA -> larc hash) */ |
| 369 |
for larcHash, gSHA := range mapping.blobMap { |
| 370 |
if gSHA == gitSHA { |
| 371 |
/* already converted, get size */ |
| 372 |
data, err := larcRepo.Blobs.Read(larcHash) |
| 373 |
if err == nil { |
| 374 |
return larcHash, int64(len(data)), false, nil |
| 375 |
} |
| 376 |
} |
| 377 |
} |
| 378 |
|
| 379 |
/* read git blob */ |
| 380 |
blob, err := gitRepo.BlobObject(plumbing.NewHash(gitSHA)) |
| 381 |
if err != nil { |
| 382 |
return "", 0, false, fmt.Errorf("get git blob: %w", err) |
| 383 |
} |
| 384 |
|
| 385 |
reader, err := blob.Reader() |
| 386 |
if err != nil { |
| 387 |
return "", 0, false, fmt.Errorf("blob reader: %w", err) |
| 388 |
} |
| 389 |
defer reader.Close() |
| 390 |
|
| 391 |
data, err := io.ReadAll(reader) |
| 392 |
if err != nil { |
| 393 |
return "", 0, false, fmt.Errorf("read blob: %w", err) |
| 394 |
} |
| 395 |
|
| 396 |
/* write to larc blob store */ |
| 397 |
larcHash, err := larcRepo.Blobs.Write(data) |
| 398 |
if err != nil { |
| 399 |
return "", 0, false, fmt.Errorf("write larc blob: %w", err) |
| 400 |
} |
| 401 |
|
| 402 |
mapping.AddBlobMapping(larcHash, gitSHA) |
| 403 |
return larcHash, int64(len(data)), true, nil |
| 404 |
} |
| 405 |
|
| 406 |
// convertGitBranch creates a larc branch from git branch |
| 407 |
func convertGitBranch( |
| 408 |
larcRepo *repo.Repository, |
| 409 |
name string, |
| 410 |
commitSHA string, |
| 411 |
mapping *MappingStore, |
| 412 |
) error { |
| 413 |
rev, ok := mapping.GetLarcRev(commitSHA) |
| 414 |
if !ok { |
| 415 |
return fmt.Errorf("no revision for commit %s", commitSHA[:8]) |
| 416 |
} |
| 417 |
|
| 418 |
/* check if branch already exists (created during init) */ |
| 419 |
existing, err := larcRepo.Meta.GetBranch(name) |
| 420 |
if err == nil { |
| 421 |
/* update existing branch */ |
| 422 |
return larcRepo.Meta.UpdateBranchHead(name, rev) |
| 423 |
} |
| 424 |
_ = existing |
| 425 |
|
| 426 |
/* create new branch */ |
| 427 |
branch := &core.Branch{ |
| 428 |
Name: name, |
| 429 |
HeadRev: rev, |
| 430 |
CreatedAt: time.Now().Unix(), |
| 431 |
CreatedFrom: rev, |
| 432 |
} |
| 433 |
|
| 434 |
return larcRepo.Meta.CreateBranch(branch) |
| 435 |
} |
| 436 |
|
| 437 |
// helper functions |
| 438 |
|
| 439 |
func firstLine(s string) string { |
| 440 |
for i, r := range s { |
| 441 |
if r == '\n' { |
| 442 |
return s[:i] |
| 443 |
} |
| 444 |
} |
| 445 |
return s |
| 446 |
} |
| 447 |
|
| 448 |
func cleanMessage(msg string) string { |
| 449 |
/* remove [larc:rN] suffix if present */ |
| 450 |
if rev, ok := ParseRevisionFromMessage(msg); ok { |
| 451 |
_ = rev |
| 452 |
idx := len(msg) - 1 |
| 453 |
for idx >= 0 && msg[idx] != '[' { |
| 454 |
idx-- |
| 455 |
} |
| 456 |
if idx > 0 { |
| 457 |
msg = msg[:idx] |
| 458 |
} |
| 459 |
} |
| 460 |
|
| 461 |
/* trim trailing whitespace */ |
| 462 |
for len(msg) > 0 && (msg[len(msg)-1] == '\n' || msg[len(msg)-1] == ' ') { |
| 463 |
msg = msg[:len(msg)-1] |
| 464 |
} |
| 465 |
|
| 466 |
return msg |
| 467 |
} |
| 468 |
|
| 469 |
func marshalTree(tree *core.Tree) ([]byte, error) { |
| 470 |
/* use sonic for JSON marshaling (consistent with repo.go) */ |
| 471 |
return sonic.Marshal(tree) |
| 472 |
} |
| 473 |
|
| 474 |
func hashBytes(data []byte) string { |
| 475 |
/* use xxhash64 like the rest of larc */ |
| 476 |
return hash.Bytes(data) |
| 477 |
} |
| 478 |
|
| 479 |
// ImportFromGit is a convenience function for CLI |
| 480 |
func ImportFromGit(gitPath, larcPath string, verbose bool) error { |
| 481 |
/* resolve paths */ |
| 482 |
absGit, err := filepath.Abs(gitPath) |
| 483 |
if err != nil { |
| 484 |
return fmt.Errorf("resolve git path: %w", err) |
| 485 |
} |
| 486 |
|
| 487 |
absLarc, err := filepath.Abs(larcPath) |
| 488 |
if err != nil { |
| 489 |
return fmt.Errorf("resolve larc path: %w", err) |
| 490 |
} |
| 491 |
|
| 492 |
/* check git repo exists */ |
| 493 |
if _, err := os.Stat(filepath.Join(absGit, ".git")); os.IsNotExist(err) { |
| 494 |
return fmt.Errorf("not a git repository: %s", absGit) |
| 495 |
} |
| 496 |
|
| 497 |
/* check larc path doesn't exist */ |
| 498 |
if _, err := os.Stat(filepath.Join(absLarc, ".larc")); err == nil { |
| 499 |
return fmt.Errorf("larc repository already exists: %s", absLarc) |
| 500 |
} |
| 501 |
|
| 502 |
opts := Git2LarcOptions{ |
| 503 |
GitPath: absGit, |
| 504 |
LarcPath: absLarc, |
| 505 |
Verbose: verbose, |
| 506 |
} |
| 507 |
|
| 508 |
result, err := Git2Larc(opts) |
| 509 |
if err != nil { |
| 510 |
return err |
| 511 |
} |
| 512 |
|
| 513 |
fmt.Printf("Imported git repository to larc:\n") |
| 514 |
fmt.Printf(" Commits: %d\n", result.CommitsConverted) |
| 515 |
fmt.Printf(" Blobs: %d\n", result.BlobsConverted) |
| 516 |
fmt.Printf(" Branches: %d\n", result.BranchesConverted) |
| 517 |
|
| 518 |
return nil |
| 519 |
} |
| 520 |
|