-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Brian McGee <[email protected]>
- Loading branch information
1 parent
51cd11a
commit d48855f
Showing
5 changed files
with
82 additions
and
249 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,200 +1,126 @@ | ||
package walk | ||
|
||
import ( | ||
"bufio" | ||
"context" | ||
"fmt" | ||
"github.com/charmbracelet/log" | ||
"github.com/numtide/treefmt/stats" | ||
"golang.org/x/sync/errgroup" | ||
"io" | ||
"io/fs" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
"runtime" | ||
"strings" | ||
|
||
"github.com/charmbracelet/log" | ||
"github.com/go-git/go-git/v5" | ||
"github.com/go-git/go-git/v5/plumbing/filemode" | ||
"github.com/numtide/treefmt/stats" | ||
"golang.org/x/sync/errgroup" | ||
) | ||
|
||
type GitReader struct { | ||
root string | ||
path string | ||
stats *stats.Stats | ||
batchSize int | ||
root string | ||
path string | ||
args []string | ||
|
||
log *log.Logger | ||
repo *git.Repository | ||
log *log.Logger | ||
stats *stats.Stats | ||
|
||
filesCh chan *File | ||
|
||
eg *errgroup.Group | ||
eg *errgroup.Group | ||
scanner *bufio.Scanner | ||
} | ||
|
||
func (g *GitReader) process() error { | ||
func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) { | ||
// ensure we record how many files we traversed | ||
defer func() { | ||
close(g.filesCh) | ||
g.stats.Add(stats.Traversed, int32(n)) | ||
}() | ||
|
||
gitIndex, err := g.repo.Storer.Index() | ||
if err != nil { | ||
return fmt.Errorf("failed to open git index: %w", err) | ||
} | ||
|
||
// if we need to walk a path that is not the root of the repository, we will read the directory structure of the | ||
// git index into memory for faster lookups | ||
var idxCache *filetree | ||
|
||
path := filepath.Clean(filepath.Join(g.root, g.path)) | ||
if !strings.HasPrefix(path, g.root) { | ||
return fmt.Errorf("path '%s' is outside of the root '%s'", path, g.root) | ||
} | ||
|
||
switch path { | ||
|
||
case g.root: | ||
|
||
// we can just iterate the index entries | ||
for _, entry := range gitIndex.Entries { | ||
|
||
// we only want regular files, not directories or symlinks | ||
if entry.Mode == filemode.Dir || entry.Mode == filemode.Symlink { | ||
continue | ||
} | ||
|
||
// stat the file | ||
path := filepath.Join(g.root, entry.Name) | ||
|
||
info, err := os.Lstat(path) | ||
if os.IsNotExist(err) { | ||
// the underlying file might have been removed without the change being staged yet | ||
g.log.Warnf("Path %s is in the index but appears to have been removed from the filesystem", path) | ||
continue | ||
} else if err != nil { | ||
return fmt.Errorf("failed to stat %s: %w", path, err) | ||
} | ||
|
||
// determine a relative path | ||
relPath, err := filepath.Rel(g.root, path) | ||
if err != nil { | ||
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) | ||
} | ||
|
||
file := File{ | ||
Path: path, | ||
RelPath: relPath, | ||
Info: info, | ||
} | ||
|
||
g.stats.Add(stats.Traversed, 1) | ||
g.filesCh <- &file | ||
} | ||
|
||
default: | ||
|
||
// read the git index into memory if it hasn't already | ||
if idxCache == nil { | ||
idxCache = &filetree{name: ""} | ||
idxCache.readIndex(gitIndex) | ||
} | ||
|
||
// git index entries are relative to the repository root, so we need to determine a relative path for the | ||
// one we are currently processing before checking if it exists within the git index | ||
relPath, err := filepath.Rel(g.root, path) | ||
if err != nil { | ||
return fmt.Errorf("failed to find root relative path for %v: %w", path, err) | ||
} | ||
|
||
if !idxCache.hasPath(relPath) { | ||
log.Debugf("path %s not found in git index, skipping", relPath) | ||
return nil | ||
} | ||
|
||
err = filepath.Walk(path, func(path string, info fs.FileInfo, _ error) error { | ||
// skip directories | ||
if info.IsDir() { | ||
return nil | ||
} | ||
|
||
// determine a path relative to g.root before checking presence in the git index | ||
relPath, err := filepath.Rel(g.root, path) | ||
if err != nil { | ||
return fmt.Errorf("failed to determine a relative path for %s: %w", path, err) | ||
} | ||
|
||
if !idxCache.hasPath(relPath) { | ||
log.Debugf("path %v not found in git index, skipping", relPath) | ||
return nil | ||
} | ||
if g.scanner == nil { | ||
// create a pipe to capture the command output | ||
r, w := io.Pipe() | ||
|
||
file := File{ | ||
Path: path, | ||
RelPath: relPath, | ||
Info: info, | ||
} | ||
// create a command which will execute from the specified sub path within root | ||
cmd := exec.Command("git", g.args...) | ||
cmd.Dir = filepath.Join(g.root, g.path) | ||
cmd.Stdout = w | ||
|
||
g.stats.Add(stats.Traversed, 1) | ||
g.filesCh <- &file | ||
return nil | ||
// execute the command in the background | ||
g.eg.Go(func() error { | ||
return w.CloseWithError(cmd.Run()) | ||
}) | ||
if err != nil { | ||
return fmt.Errorf("failed to walk %s: %w", path, err) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (g *GitReader) Read(ctx context.Context, files []*File) (n int, err error) { | ||
idx := 0 | ||
// create a new scanner for reading the output | ||
g.scanner = bufio.NewScanner(r) | ||
} | ||
|
||
LOOP: | ||
for idx < len(files) { | ||
|
||
for n < len(files) { | ||
select { | ||
|
||
// exit early if the context was cancelled | ||
case <-ctx.Done(): | ||
return 0, ctx.Err() | ||
case file, ok := <-g.filesCh: | ||
if !ok { | ||
return n, ctx.Err() | ||
|
||
default: | ||
// read the next file | ||
if g.scanner.Scan() { | ||
path := filepath.Join(g.root, g.path, g.scanner.Text()) | ||
|
||
g.log.Debugf("processing file: %s", path) | ||
|
||
info, err := os.Stat(path) | ||
if os.IsNotExist(err) { | ||
// the underlying file might have been removed | ||
g.log.Warnf( | ||
"Path %s is in the worktree but appears to have been removed from the filesystem", path, | ||
) | ||
continue | ||
} else if err != nil { | ||
return n, fmt.Errorf("failed to stat %s: %w", path, err) | ||
} | ||
|
||
files[n] = &File{ | ||
Path: path, | ||
RelPath: filepath.Join(g.path, g.scanner.Text()), | ||
Info: info, | ||
} | ||
n++ | ||
|
||
} else { | ||
// nothing more to read | ||
err = io.EOF | ||
break LOOP | ||
} | ||
files[idx] = file | ||
idx++ | ||
} | ||
} | ||
|
||
return idx, err | ||
return n, err | ||
} | ||
|
||
func (g *GitReader) Close() error { | ||
return g.eg.Wait() | ||
} | ||
|
||
func NewGitReader( | ||
func NewGitWorktreeReader( | ||
root string, | ||
path string, | ||
statz *stats.Stats, | ||
batchSize int, | ||
) (*GitReader, error) { | ||
repo, err := git.PlainOpen(root) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to open git repository: %w", err) | ||
} | ||
|
||
eg := &errgroup.Group{} | ||
|
||
r := &GitReader{ | ||
root: root, | ||
path: path, | ||
stats: statz, | ||
batchSize: batchSize, | ||
log: log.WithPrefix("walk[git]"), | ||
repo: repo, | ||
filesCh: make(chan *File, batchSize*runtime.NumCPU()), | ||
eg: eg, | ||
} | ||
// check if the root is a git repository | ||
cmd := exec.Command("git", "rev-parse", "--is-inside-work-tree") | ||
cmd.Dir = root | ||
|
||
eg.Go(r.process) | ||
if out, err := cmd.Output(); err != nil { | ||
return nil, fmt.Errorf("failed to check if git repository is inside work tree: %w", err) | ||
} else if strings.Trim(string(out), "\n") != "true" { | ||
return nil, fmt.Errorf("git repository is not inside work tree") | ||
} | ||
|
||
return r, nil | ||
return &GitReader{ | ||
root: root, | ||
path: path, | ||
args: []string{"ls-files"}, | ||
stats: statz, | ||
eg: &errgroup.Group{}, | ||
log: log.WithPrefix("walk[git]"), | ||
}, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.