package status import ( "database/sql" "fmt" "io/fs" "os" "path/filepath" "runtime" "sync" _ "github.com/mattn/go-sqlite3" "larc.wejust.rest/larc/internal/core" "larc.wejust.rest/larc/internal/hash" "larc.wejust.rest/larc/internal/ignore" "larc.wejust.rest/larc/internal/repo" ) /* Fast parallel scanner for working directory status. * Uses mtime check first, then hash for actual comparison. * Stores index in SQLite for persistence. */ // ChangeType represents the type of file change type ChangeType int const ( Unchanged ChangeType = iota Added Modified Deleted ) func (t ChangeType) String() string { switch t { case Added: return "added" case Modified: return "modified" case Deleted: return "deleted" default: return "unchanged" } } // Change represents a single file change type Change struct { Path string Type ChangeType Hash string // new hash for added/modified } // TrackedFile represents a file in the index type TrackedFile struct { Path string BlobHash string Mtime int64 Size int64 Mode uint32 } // StagedFile represents a staged change type StagedFile struct { Path string Action string // add, modify, delete BlobHash string Size int64 Mode uint32 } // Scanner scans working directory for changes type Scanner struct { repo *repo.Repository db *sql.DB root string indexPath string ignore *ignore.Matcher Verbose bool } const indexSchema = ` CREATE TABLE IF NOT EXISTS tracked ( path TEXT PRIMARY KEY, blob_hash TEXT, mtime INTEGER, size INTEGER, mode INTEGER ); CREATE TABLE IF NOT EXISTS staging ( path TEXT PRIMARY KEY, action TEXT NOT NULL, blob_hash TEXT, size INTEGER, mode INTEGER ); ` // NewScanner creates a new scanner for the repository func NewScanner(r *repo.Repository) *Scanner { indexPath := filepath.Join(r.Dir, "index.db") db, err := sql.Open("sqlite3", indexPath+"?_journal_mode=WAL&_busy_timeout=5000") if err != nil { return &Scanner{repo: r, root: r.Root, ignore: ignore.New(r.Root)} } db.SetMaxOpenConns(1) db.Exec(indexSchema) return &Scanner{ repo: r, db: db, root: r.Root, indexPath: indexPath, ignore: ignore.New(r.Root), } } // Close closes the scanner func (s *Scanner) Close() error { if s.db != nil { return s.db.Close() } return nil } // Scan scans working directory for changes func (s *Scanner) Scan() ([]Change, error) { if s.Verbose { fmt.Println("loading tracked...") } tracked := s.loadTracked() if s.Verbose { fmt.Printf("%d tracked files\n", len(tracked)) } seen := make(map[string]bool) var changes []Change var mu sync.Mutex var fileCount int /* parallel file walker */ sem := make(chan struct{}, runtime.NumCPU()) var wg sync.WaitGroup if s.Verbose { fmt.Println("scanning working directory...") } err := filepath.WalkDir(s.root, func(path string, d fs.DirEntry, err error) error { if err != nil { return nil } relPath, err := filepath.Rel(s.root, path) if err != nil { return nil } /* check ignore patterns (.gitignore + .larcignore) */ if s.ignore.Match(relPath, d.IsDir()) { if d.IsDir() { return filepath.SkipDir } return nil } /* skip directories (we only track files) */ if d.IsDir() { return nil } fileCount++ if fileCount%100 == 0 { fmt.Printf("DEBUG scan: walked %d files...\n", fileCount) } /* get file info BEFORE launching goroutine to avoid race condition */ info, err := d.Info() if err != nil { return nil } wg.Add(1) go func(path, relPath string, info fs.FileInfo) { defer wg.Done() sem <- struct{}{} defer func() { <-sem }() mu.Lock() seen[relPath] = true mu.Unlock() tracked, exists := tracked[relPath] if !exists { /* new file */ h, err := hash.File(path) if err != nil { return } mu.Lock() changes = append(changes, Change{ Path: relPath, Type: Added, Hash: h, }) mu.Unlock() return } /* fast path: check mtime first */ mtime := info.ModTime().Unix() if mtime == tracked.Mtime && info.Size() == tracked.Size { return // unchanged } /* slow path: compute hash */ h, err := hash.File(path) if err != nil { return } if h != tracked.BlobHash { mu.Lock() changes = append(changes, Change{ Path: relPath, Type: Modified, Hash: h, }) mu.Unlock() } }(path, relPath, info) return nil }) if s.Verbose { fmt.Printf("walked %d files\n", fileCount) } wg.Wait() if err != nil { return nil, fmt.Errorf("walk failed: %w", err) } /* check for deleted files */ for path := range tracked { if !seen[path] { changes = append(changes, Change{ Path: path, Type: Deleted, }) } } return changes, nil } func (s *Scanner) loadTracked() map[string]*TrackedFile { tracked := make(map[string]*TrackedFile) if s.db == nil { return tracked } rows, err := s.db.Query("SELECT path, blob_hash, mtime, size, mode FROM tracked") if err != nil { return tracked } defer rows.Close() for rows.Next() { t := &TrackedFile{} if err := rows.Scan(&t.Path, &t.BlobHash, &t.Mtime, &t.Size, &t.Mode); err != nil { continue } tracked[t.Path] = t } return tracked } // Stage stages a file for commit func (s *Scanner) Stage(path, blobHash string, size int64) error { if s.db == nil { return nil } info, err := os.Stat(filepath.Join(s.root, path)) mode := uint32(0644) if err == nil { mode = uint32(info.Mode()) } _, err = s.db.Exec(` INSERT OR REPLACE INTO staging (path, action, blob_hash, size, mode) VALUES (?, 'add', ?, ?, ?) `, path, blobHash, size, mode) return err } // StageDelete stages a file deletion func (s *Scanner) StageDelete(path string) error { if s.db == nil { return nil } _, err := s.db.Exec(` INSERT OR REPLACE INTO staging (path, action, blob_hash, size, mode) VALUES (?, 'delete', '', 0, 0) `, path) return err } // GetStagedEntries returns all staged entries as TreeEntry slice func (s *Scanner) GetStagedEntries() ([]core.TreeEntry, error) { if s.db == nil { return nil, nil } /* get current tree entries */ currentEntries := make(map[string]core.TreeEntry) rev, _ := s.repo.CurrentRevision() if rev > 0 { r, err := s.repo.Meta.GetRevision(rev) if err == nil { tree, err := s.repo.GetTree(r.TreeHash) if err == nil { for _, e := range tree.Entries { currentEntries[e.Path] = e } } } } /* apply staged changes */ rows, err := s.db.Query("SELECT path, action, blob_hash, size, mode FROM staging") if err != nil { return nil, err } defer rows.Close() for rows.Next() { var path, action, blobHash string var size int64 var mode uint32 if err := rows.Scan(&path, &action, &blobHash, &size, &mode); err != nil { continue } if action == "delete" { delete(currentEntries, path) } else { currentEntries[path] = core.TreeEntry{ Path: path, Mode: mode, Size: size, BlobHash: blobHash, Kind: core.EntryKindFile, } } } /* convert to slice */ entries := make([]core.TreeEntry, 0, len(currentEntries)) for _, e := range currentEntries { entries = append(entries, e) } return entries, nil } // ClearStaging clears all staged changes func (s *Scanner) ClearStaging() error { if s.db == nil { return nil } _, err := s.db.Exec("DELETE FROM staging") return err } // SaveIndex saves tracked files to index func (s *Scanner) SaveIndex() error { if s.db == nil { return nil } /* collect deletions BEFORE starting transaction */ var deletePaths []string rows, err := s.db.Query("SELECT path FROM staging WHERE action = 'delete'") if err == nil { for rows.Next() { var path string if err := rows.Scan(&path); err == nil { deletePaths = append(deletePaths, path) } } rows.Close() } /* get all staged entries BEFORE starting transaction */ entries, err := s.GetStagedEntries() if err != nil { return err } /* now start transaction and apply changes */ tx, err := s.db.Begin() if err != nil { return err } /* handle deletions */ for _, path := range deletePaths { tx.Exec("DELETE FROM tracked WHERE path = ?", path) } /* update tracked table */ for _, e := range entries { info, err := os.Stat(filepath.Join(s.root, e.Path)) mtime := int64(0) if err == nil { mtime = info.ModTime().Unix() } tx.Exec(` INSERT OR REPLACE INTO tracked (path, blob_hash, mtime, size, mode) VALUES (?, ?, ?, ?, ?) `, e.Path, e.BlobHash, mtime, e.Size, e.Mode) } return tx.Commit() }