Skip to content

Commit

Permalink
lsp: Implement bundle use in workspace Eval (#987)
Browse files Browse the repository at this point in the history
* WIP

* lsp/bundles: add a bundle cache

This cache can be used to maintain a cache of bundles in memory that are
found in the workspace.

Signed-off-by: Charlie Egan <[email protected]>

* lsp: Implement bundle use in workspace Eval

Signed-off-by: Charlie Egan <[email protected]>

* PR Feedback

---------

Signed-off-by: Charlie Egan <[email protected]>
  • Loading branch information
charlieegan3 authored Aug 15, 2024
1 parent 789fccf commit acbdb88
Show file tree
Hide file tree
Showing 7 changed files with 591 additions and 11 deletions.
1 change: 1 addition & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ linters:
- nolintlint
- depguard
- gomoddirectives # need replacements for wasip1
- execinquery # deprecated
linters-settings:
tagliatelle:
case:
Expand Down
35 changes: 35 additions & 0 deletions internal/lsp/bundles/bundles.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package bundles

import (
"fmt"
"os"
"path/filepath"
"slices"

"github.com/open-policy-agent/opa/bundle"
)

// LoadDataBundle loads a bundle from the given path but only includes data
// files. The path must contain a bundle manifest file.
func LoadDataBundle(path string) (bundle.Bundle, error) {
if _, err := os.Stat(filepath.Join(path, ".manifest")); err != nil {
return bundle.Bundle{}, fmt.Errorf("manifest file was not found at bundle path %q", path)
}

b, err := bundle.NewCustomReader(bundle.NewDirectoryLoader(path).WithFilter(dataFileLoaderFilter)).Read()
if err != nil {
return bundle.Bundle{}, fmt.Errorf("failed to read bundle: %w", err)
}

return b, nil
}

func dataFileLoaderFilter(abspath string, info os.FileInfo, _ int) bool {
if info.IsDir() {
return false
}

basename := filepath.Base(abspath)

return !slices.Contains([]string{".manifest", "data.json", "data.yml", "data.yaml"}, basename)
}
101 changes: 101 additions & 0 deletions internal/lsp/bundles/bundles_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package bundles

import (
"os"
"path/filepath"
"reflect"
"testing"
)

func TestLoadDataBundle(t *testing.T) {
t.Parallel()

testCases := map[string]struct {
path string
files map[string]string
expectedData any
}{
"simple bundle": {
path: "foo",
files: map[string]string{
"foo/.manifest": `{"roots":["foo"]}`,
"foo/data.json": `{"foo": "bar"}`,
},
expectedData: map[string]any{
"foo": "bar",
},
},
"nested bundle": {
path: "foo",
files: map[string]string{
"foo/.manifest": `{"roots":["foo", "bar"]}`,
"foo/data.yml": `foo: bar`,
"foo/bar/data.yaml": `bar: baz`,
},
expectedData: map[string]any{
"foo": "bar",
"bar": map[string]any{
"bar": "baz",
},
},
},
"array data": {
path: "foo",
files: map[string]string{
"foo/.manifest": `{"roots":["bar"]}`,
"foo/bar/data.json": `[{"foo": "bar"}]`,
},
expectedData: map[string]any{
"bar": []any{
map[string]any{
"foo": "bar",
},
},
},
},
"rego files": {
path: "foo",
files: map[string]string{
"foo/.manifest": `{"roots":["foo"]}`,
"food/rego.rego": `package foo`,
},
expectedData: map[string]any{},
},
}

for testCase, testData := range testCases {
t.Run(testCase, func(t *testing.T) {
t.Parallel()

workspacePath := t.TempDir()

// create the workspace state
for file, contents := range testData.files {
filePath := filepath.Join(workspacePath, file)

dir := filepath.Dir(filePath)
if err := os.MkdirAll(dir, 0o755); err != nil {
t.Fatalf("failed to create directory %s: %v", dir, err)
}

err := os.WriteFile(filePath, []byte(contents), 0o600)
if err != nil {
t.Fatalf("failed to write file %s: %v", filePath, err)
}
}

b, err := LoadDataBundle(filepath.Join(workspacePath, testData.path))
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(b.Data, testData.expectedData) {
t.Fatalf("expected data to be %v, but got %v", testData.expectedData, b.Data)
}

if len(b.Modules) != 0 {
t.Fatalf("expected no modules, but got %d", len(b.Modules))
}
})
}
}
235 changes: 235 additions & 0 deletions internal/lsp/bundles/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
package bundles

import (
"bytes"
//nolint:gosec
"crypto/md5"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/open-policy-agent/opa/bundle"

"github.com/styrainc/regal/internal/util"
)

// Cache is a struct that maintains a number of bundles in memory and
// provides a way to refresh them when the source files change.
type Cache struct {
workspacePath string
bundles map[string]*cacheBundle
errorLog io.Writer
}

type CacheOptions struct {
WorkspacePath string
ErrorLog io.Writer
}

func NewCache(opts *CacheOptions) *Cache {
workspacePath := opts.WorkspacePath

if !strings.HasSuffix(workspacePath, string(filepath.Separator)) {
workspacePath += string(filepath.Separator)
}

c := &Cache{
workspacePath: workspacePath,
bundles: make(map[string]*cacheBundle),
}

if opts.ErrorLog != nil {
c.errorLog = opts.ErrorLog
}

return c
}

// Refresh walks the workspace path and loads or refreshes any bundles that
// have changed since the last refresh.
func (c *Cache) Refresh() ([]string, error) {
if c.workspacePath == "" {
return nil, errors.New("workspace path is empty")
}

// find all the bundle roots that are currently present on disk
var foundBundleRoots []string

err := filepath.Walk(c.workspacePath, func(path string, info os.FileInfo, _ error) error {
if info.IsDir() && (info.Name() == ".git" || info.Name() == ".idea") {
return filepath.SkipDir
}

if info.IsDir() {
return nil
}

if filepath.Base(path) == ".manifest" {
foundBundleRoots = append(
foundBundleRoots,
strings.TrimPrefix(filepath.Dir(path), c.workspacePath),
)
}

return nil
})
if err != nil {
return nil, fmt.Errorf("failed to walk workspace path: %w", err)
}

var refreshedBundles []string

// refresh any bundles that have changed
for _, root := range foundBundleRoots {
if _, ok := c.bundles[root]; !ok {
c.bundles[root] = &cacheBundle{}
}

refreshed, err := c.bundles[root].Refresh(filepath.Join(c.workspacePath, root))
if err != nil {
if c.errorLog != nil {
fmt.Fprintf(c.errorLog, "failed to refresh bundle %q: %v\n", root, err)
}

continue
}

if refreshed {
refreshedBundles = append(refreshedBundles, root)
}
}

// remove any bundles that are no longer present on disk
for root := range c.bundles {
found := false

for _, foundRoot := range foundBundleRoots {
if root == foundRoot {
found = true

break
}
}

if !found {
delete(c.bundles, root)
}
}

return refreshedBundles, nil
}

// List returns a list of all the bundle roots that are currently present in
// the cache.
func (c *Cache) List() []string {
return util.Keys(c.bundles)
}

// Get returns the bundle for the given root from the cache.
func (c *Cache) Get(root string) (bundle.Bundle, bool) {
b, ok := c.bundles[root]
if !ok {
return bundle.Bundle{}, false
}

return b.bundle, true
}

// All returns all the bundles in the cache.
func (c *Cache) All() map[string]bundle.Bundle {
bundles := make(map[string]bundle.Bundle)

for root, cacheBundle := range c.bundles {
bundles[root] = cacheBundle.bundle
}

return bundles
}

// cacheBundle is an internal struct that holds a bundle.Bundle and the MD5
// hash of each source file in the bundle. Hashes are used to determine if
// the bundle should be reloaded.
type cacheBundle struct {
bundle bundle.Bundle
sourceDigests map[string][]byte
}

// Refresh loads the bundle from disk and updates the cache if any of the
// source files have changed since the last refresh.
func (c *cacheBundle) Refresh(path string) (bool, error) {
onDiskSourceDigests := make(map[string][]byte)

// walk the bundle path and calculate the MD5 hash of each file on disk
// at the moment
err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

if info.IsDir() || dataFileLoaderFilter(path, info, 0) {
return nil
}

hash, err := calculateMD5(path)
if err != nil {
return err
}

onDiskSourceDigests[path] = hash

return nil
})
if err != nil {
return false, fmt.Errorf("failed to walk bundle path %q: %w", path, err)
}

// compare the files on disk with the files that have been seen before
// and return without reloading the bundle if there have been no changes
if len(onDiskSourceDigests) == len(c.sourceDigests) {
changed := false

for path, hash := range onDiskSourceDigests {
if !bytes.Equal(hash, c.sourceDigests[path]) {
changed = true

break
}
}

if !changed {
return false, nil
}
}

// if there has been any change in any of the source files, then
// reload the bundle
c.bundle, err = LoadDataBundle(path)
if err != nil {
return false, fmt.Errorf("failed to load bundle %q: %w", path, err)
}

// update the bundle's sourceDigests to the new on-disk state after a
// successful refresh
c.sourceDigests = onDiskSourceDigests

return true, nil
}

func calculateMD5(filePath string) ([]byte, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("failed to open file %q: %w", filePath, err)
}
defer file.Close()

// nolint:gosec
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return nil, fmt.Errorf("failed to calculate MD5 hash for file %q: %w", filePath, err)
}

return hash.Sum(nil), nil
}
Loading

0 comments on commit acbdb88

Please sign in to comment.