Skip to content

Commit

Permalink
perf: walk workspace directories in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
jbedard committed Sep 13, 2024
1 parent 0890963 commit cc4bede
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 18 deletions.
1 change: 1 addition & 0 deletions walk/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ go_library(
"//flag",
"//rule",
"@com_github_bmatcuk_doublestar_v4//:doublestar",
"@org_golang_x_sync//errgroup",
],
)

Expand Down
3 changes: 0 additions & 3 deletions walk/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ func getWalkConfig(c *config.Config) *walkConfig {
}

func (wc *walkConfig) isExcluded(rel, base string) bool {
if base == ".git" {
return true
}
return matchAnyGlob(wc.excludes, path.Join(rel, base))
}

Expand Down
85 changes: 70 additions & 15 deletions walk/walk.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import (
"os"
"path"
"path/filepath"
"sort"
"strings"

"github.com/bazelbuild/bazel-gazelle/config"
"github.com/bazelbuild/bazel-gazelle/rule"
"golang.org/x/sync/errgroup"
)

// Mode determines which directories Walk visits and which directories
Expand Down Expand Up @@ -122,25 +124,26 @@ func Walk(c *config.Config, cexts []config.Configurer, dirs []string, mode Mode,
log.Printf("error loading .bazelignore: %v", err)
}

visit(c, cexts, isBazelIgnored, knownDirectives, updateRels, wf, c.RepoRoot, "", false)
}

func visit(c *config.Config, cexts []config.Configurer, isBazelIgnored isIgnoredFunc, knownDirectives map[string]bool, updateRels *UpdateFilter, wf WalkFunc, dir, rel string, updateParent bool) {
if isBazelIgnored(rel) {
return
trie, err := buildTrie(c, isBazelIgnored)
if err != nil {
log.Fatalf("error walking the file system: %v\n", err)
}

visit(c, cexts, knownDirectives, updateRels, trie, wf, c.RepoRoot, "", false)
}

func visit(c *config.Config, cexts []config.Configurer, knownDirectives map[string]bool, updateRels *UpdateFilter, trie *pathTrie, wf WalkFunc, dir, rel string, updateParent bool) {
haveError := false

// TODO: OPT: ReadDir stats all the files, which is slow. We just care about
// names and modes, so we should use something like
// golang.org/x/tools/internal/fastwalk to speed this up.
ents, err := os.ReadDir(dir)
if err != nil {
log.Print(err)
return
ents := make([]fs.DirEntry, 0, len(trie.children))
for _, node := range trie.children {
ents = append(ents, *node.entry)
}

sort.SliceStable(ents, func(i, j int) bool {
return ents[i].Name() < ents[j].Name()
})

f, err := loadBuildFile(c, rel, dir, ents)
if err != nil {
log.Print(err)
Expand All @@ -162,7 +165,7 @@ func visit(c *config.Config, cexts []config.Configurer, isBazelIgnored isIgnored
var subdirs, regularFiles []string
for _, ent := range ents {
base := ent.Name()
if isBazelIgnored(path.Join(rel, base)) || wc.isExcluded(rel, base) {
if wc.isExcluded(rel, base) {
continue
}
ent := resolveFileInfo(wc, dir, rel, ent)
Expand All @@ -179,7 +182,7 @@ func visit(c *config.Config, cexts []config.Configurer, isBazelIgnored isIgnored
shouldUpdate := updateRels.shouldUpdate(rel, updateParent)
for _, sub := range subdirs {
if subRel := path.Join(rel, sub); updateRels.shouldVisit(subRel, shouldUpdate) {
visit(c, cexts, isBazelIgnored, knownDirectives, updateRels, wf, filepath.Join(dir, sub), subRel, shouldUpdate)
visit(c, cexts, knownDirectives, updateRels, trie.children[sub], wf, path.Join(dir, sub), subRel, shouldUpdate)
}
}

Expand Down Expand Up @@ -356,3 +359,55 @@ func resolveFileInfo(wc *walkConfig, dir, rel string, ent fs.DirEntry) fs.DirEnt
}
return fs.FileInfoToDirEntry(fi)
}

type pathTrie struct {
children map[string]*pathTrie
entry *fs.DirEntry
}

// Basic factory method to ensure the entry is properly copied
func newTrie(entry fs.DirEntry) *pathTrie {
return &pathTrie{entry: &entry}
}

func buildTrie(c *config.Config, isIgnored isIgnoredFunc) (*pathTrie, error) {
trie := &pathTrie{
children: map[string]*pathTrie{},
}

eg := errgroup.Group{}
eg.Go(func() error {
return walkDir(c.RepoRoot, "", &eg, isIgnored, trie)
})

return trie, eg.Wait()
}

// walkDir recursively descends path, calling walkDirFn.
func walkDir(root, rel string, eg *errgroup.Group, isIgnored isIgnoredFunc, trie *pathTrie) error {
entries, err := os.ReadDir(path.Join(root, rel))
if err != nil {
return err
}

for _, entry := range entries {
entryName := entry.Name()
entryPath := path.Join(rel, entryName)

// Ignore .git, empty names and ignored paths
if entryName == "" || entryName == ".git" || isIgnored(entryPath) {
continue
}

entryTrie := newTrie(entry)
trie.children[entry.Name()] = entryTrie

if entry.IsDir() {
entryTrie.children = map[string]*pathTrie{}
eg.Go(func() error {
return walkDir(root, entryPath, eg, isIgnored, entryTrie)
})
}
}
return nil
}

0 comments on commit cc4bede

Please sign in to comment.