From f13bed16fc13607ced4269e5ff5b1ae13f48c9ec Mon Sep 17 00:00:00 2001 From: Parth Patel <88045217+pxp928@users.noreply.github.com> Date: Thu, 3 Oct 2024 17:26:24 -0400 Subject: [PATCH] update certifier with specific package queries to keep state (#2163) * add new QueryVulnPackagesList and implement ENT backend Signed-off-by: pxp928 * change to single query with enum Signed-off-by: pxp928 * combine queries to conslidate on backend Signed-off-by: pxp928 * add last-scan flag to certifier cli and update unit tests Signed-off-by: pxp928 * implement keyvalue backend implementation for QueryPackagesListForType Signed-off-by: pxp928 * update backend unit test for TestQueryPackagesListForType Signed-off-by: pxp928 * add check if links are not set in keyvalue backend Signed-off-by: pxp928 * update ent search to get latest vuln time to compare Signed-off-by: pxp928 * add license and empty tests for backend Signed-off-by: pxp928 * rename query and update comments Signed-off-by: pxp928 * update graphql schema comments for QueryType Signed-off-by: pxp928 * default last-scan to 4 hours in gauc.yaml Signed-off-by: pxp928 * change cli default to 4 hours for last-scan Signed-off-by: pxp928 --------- Signed-off-by: pxp928 --- cmd/guaccollect/cmd/license.go | 19 +- cmd/guaccollect/cmd/osv.go | 19 +- cmd/guacone/cmd/license.go | 15 +- cmd/guacone/cmd/osv.go | 15 +- container_files/arango/guac.yaml | 3 + container_files/ent/guac.yaml | 3 + container_files/guac/guac.yaml | 3 + container_files/neo4j/guac.yaml | 3 + container_files/redis/guac.yaml | 3 + container_files/tikv/guac.yaml | 3 + guac.yaml | 3 + internal/testing/backend/main_test.go | 7 +- internal/testing/backend/search_test.go | 239 ++++++++++++++ internal/testing/cmd/pubsub_test/cmd/osv.go | 3 +- internal/testing/mocks/backend.go | 15 + pkg/assembler/backends/arangodb/search.go | 4 + pkg/assembler/backends/backends.go | 1 + pkg/assembler/backends/ent/backend/search.go | 123 +++++++ pkg/assembler/backends/keyvalue/search.go | 213 +++++++++++++ pkg/assembler/backends/neo4j/search.go | 4 + pkg/assembler/clients/generated/operations.go | 301 ++++++++++++++++++ .../clients/operations/search.graphql | 17 + .../graphql/generated/artifact.generated.go | 217 +++++++++++++ .../graphql/generated/root_.generated.go | 35 ++ .../graphql/generated/search.generated.go | 10 + pkg/assembler/graphql/model/nodes.go | 46 +++ .../graphql/resolvers/search.resolvers.go | 5 + pkg/assembler/graphql/schema/search.graphql | 22 ++ .../components/root_package/root_package.go | 21 +- .../root_package/root_package_test.go | 56 ++-- pkg/cli/store.go | 2 + 31 files changed, 1377 insertions(+), 53 deletions(-) diff --git a/cmd/guaccollect/cmd/license.go b/cmd/guaccollect/cmd/license.go index 3ece5d69e8..a44d3e338f 100644 --- a/cmd/guaccollect/cmd/license.go +++ b/cmd/guaccollect/cmd/license.go @@ -23,6 +23,7 @@ import ( "time" "github.com/Khan/genqlient/graphql" + "github.com/guacsec/guac/pkg/assembler/clients/generated" "github.com/guacsec/guac/pkg/certifier" "github.com/guacsec/guac/pkg/certifier/certify" "github.com/guacsec/guac/pkg/certifier/clearlydefined" @@ -54,6 +55,9 @@ type cdOptions struct { addedLatency *time.Duration // sets the batch size for pagination query for the certifier batchSize int + // last time the scan was done in hours, if not set it will return + // all packages to check + lastScan *int } var cdCmd = &cobra.Command{ @@ -85,6 +89,7 @@ you have access to read and write to the respective blob store.`, viper.GetBool("publish-to-queue"), viper.GetString("certifier-latency"), viper.GetInt("certifier-batch-size"), + viper.GetInt("last-scan"), ) if err != nil { fmt.Printf("unable to validate flags: %v\n", err) @@ -103,7 +108,7 @@ you have access to read and write to the respective blob store.`, httpClient := http.Client{Transport: transport} gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient) - packageQueryFunc, err := getCDPackageQuery(gqlclient, opts.batchSize, opts.addedLatency) + packageQueryFunc, err := getCDPackageQuery(gqlclient, opts.batchSize, opts.addedLatency, opts.lastScan) if err != nil { logger.Errorf("error: %v", err) os.Exit(1) @@ -113,9 +118,9 @@ you have access to read and write to the respective blob store.`, }, } -func getCDPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration) (func() certifier.QueryComponents, error) { +func getCDPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration, lastScan *int) (func() certifier.QueryComponents, error) { return func() certifier.QueryComponents { - packageQuery := root_package.NewPackageQuery(client, batchSize, cdQuerySize, addedLatency) + packageQuery := root_package.NewPackageQuery(client, generated.QueryTypeLicense, batchSize, cdQuerySize, addedLatency, lastScan) return packageQuery }, nil } @@ -129,7 +134,7 @@ func validateCDFlags( poll bool, pubToQueue bool, certifierLatencyStr string, - batchSize int) (cdOptions, error) { + batchSize int, lastScan int) (cdOptions, error) { var opts cdOptions @@ -157,14 +162,16 @@ func validateCDFlags( } opts.batchSize = batchSize - + if lastScan != 0 { + opts.lastScan = &lastScan + } return opts, nil } func init() { set, err := cli.BuildFlags([]string{"interval", "header-file", "certifier-latency", - "certifier-batch-size"}) + "certifier-batch-size", "last-scan"}) if err != nil { fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err) os.Exit(1) diff --git a/cmd/guaccollect/cmd/osv.go b/cmd/guaccollect/cmd/osv.go index 27d4875be3..98753c507e 100644 --- a/cmd/guaccollect/cmd/osv.go +++ b/cmd/guaccollect/cmd/osv.go @@ -27,6 +27,7 @@ import ( "time" "github.com/Khan/genqlient/graphql" + "github.com/guacsec/guac/pkg/assembler/clients/generated" "github.com/guacsec/guac/pkg/blob" "github.com/guacsec/guac/pkg/certifier" "github.com/guacsec/guac/pkg/certifier/certify" @@ -62,6 +63,9 @@ type osvOptions struct { addedLatency *time.Duration // sets the batch size for pagination query for the certifier batchSize int + // last time the scan was done in hours, if not set it will return + // all packages to check + lastScan *int } var osvCmd = &cobra.Command{ @@ -93,6 +97,7 @@ you have access to read and write to the respective blob store.`, viper.GetBool("publish-to-queue"), viper.GetString("certifier-latency"), viper.GetInt("certifier-batch-size"), + viper.GetInt("last-scan"), ) if err != nil { fmt.Printf("unable to validate flags: %v\n", err) @@ -111,7 +116,7 @@ you have access to read and write to the respective blob store.`, httpClient := http.Client{Transport: transport} gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient) - packageQueryFunc, err := getOSVPackageQuery(gqlclient, opts.batchSize, opts.addedLatency) + packageQueryFunc, err := getOSVPackageQuery(gqlclient, opts.batchSize, opts.addedLatency, opts.lastScan) if err != nil { logger.Errorf("error: %v", err) os.Exit(1) @@ -130,7 +135,7 @@ func validateOSVFlags( poll bool, pubToQueue bool, certifierLatencyStr string, - batchSize int) (osvOptions, error) { + batchSize int, lastScan int) (osvOptions, error) { var opts osvOptions @@ -158,7 +163,9 @@ func validateOSVFlags( } opts.batchSize = batchSize - + if lastScan != 0 { + opts.lastScan = &lastScan + } return opts, nil } @@ -168,9 +175,9 @@ func getCertifierPublish(ctx context.Context, blobStore *blob.BlobStore, pubsub }, nil } -func getOSVPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration) (func() certifier.QueryComponents, error) { +func getOSVPackageQuery(client graphql.Client, batchSize int, addedLatency *time.Duration, lastScan *int) (func() certifier.QueryComponents, error) { return func() certifier.QueryComponents { - packageQuery := root_package.NewPackageQuery(client, batchSize, osvQuerySize, addedLatency) + packageQuery := root_package.NewPackageQuery(client, generated.QueryTypeVulnerability, batchSize, osvQuerySize, addedLatency, lastScan) return packageQuery }, nil } @@ -253,7 +260,7 @@ func initializeNATsandCertifier(ctx context.Context, blobAddr, pubsubAddr string func init() { set, err := cli.BuildFlags([]string{"interval", "header-file", "certifier-latency", - "certifier-batch-size"}) + "certifier-batch-size", "last-scan"}) if err != nil { fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err) os.Exit(1) diff --git a/cmd/guacone/cmd/license.go b/cmd/guacone/cmd/license.go index 111eba8019..fc8d606bf7 100644 --- a/cmd/guacone/cmd/license.go +++ b/cmd/guacone/cmd/license.go @@ -27,6 +27,7 @@ import ( "time" "github.com/Khan/genqlient/graphql" + "github.com/guacsec/guac/pkg/assembler/clients/generated" "github.com/guacsec/guac/pkg/certifier" "github.com/guacsec/guac/pkg/certifier/certify" "github.com/guacsec/guac/pkg/certifier/clearlydefined" @@ -56,6 +57,9 @@ type cdOptions struct { addedLatency *time.Duration // sets the batch size for pagination query for the certifier batchSize int + // last time the scan was done in hours, if not set it will return + // all packages to check + lastScan *int } var cdCmd = &cobra.Command{ @@ -74,6 +78,7 @@ var cdCmd = &cobra.Command{ viper.GetBool("add-license-on-ingest"), viper.GetString("certifier-latency"), viper.GetInt("certifier-batch-size"), + viper.GetInt("last-scan"), ) if err != nil { fmt.Printf("unable to validate flags: %v\n", err) @@ -100,7 +105,7 @@ var cdCmd = &cobra.Command{ httpClient := http.Client{Transport: transport} gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient) - packageQuery := root_package.NewPackageQuery(gqlclient, opts.batchSize, cdQuerySize, opts.addedLatency) + packageQuery := root_package.NewPackageQuery(gqlclient, generated.QueryTypeLicense, opts.batchSize, cdQuerySize, opts.addedLatency, opts.lastScan) totalNum := 0 docChan := make(chan *processor.Document) @@ -230,7 +235,7 @@ func validateCDFlags( queryVulnIngestion bool, queryLicenseIngestion bool, certifierLatencyStr string, - batchSize int, + batchSize int, lastScan int, ) (cdOptions, error) { var opts cdOptions opts.graphqlEndpoint = graphqlEndpoint @@ -254,6 +259,10 @@ func validateCDFlags( opts.batchSize = batchSize + if lastScan != 0 { + opts.lastScan = &lastScan + } + csubOpts, err := csub_client.ValidateCsubClientFlags(csubAddr, csubTls, csubTlsSkipVerify) if err != nil { return opts, fmt.Errorf("unable to validate csub client flags: %w", err) @@ -267,7 +276,7 @@ func validateCDFlags( func init() { set, err := cli.BuildFlags([]string{"certifier-latency", - "certifier-batch-size"}) + "certifier-batch-size", "last-scan"}) if err != nil { fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err) os.Exit(1) diff --git a/cmd/guacone/cmd/osv.go b/cmd/guacone/cmd/osv.go index 3936704839..2aa0dd3117 100644 --- a/cmd/guacone/cmd/osv.go +++ b/cmd/guacone/cmd/osv.go @@ -27,6 +27,7 @@ import ( "time" "github.com/Khan/genqlient/graphql" + "github.com/guacsec/guac/pkg/assembler/clients/generated" "github.com/guacsec/guac/pkg/certifier" "github.com/guacsec/guac/pkg/certifier/certify" "github.com/guacsec/guac/pkg/certifier/components/root_package" @@ -56,6 +57,9 @@ type osvOptions struct { addedLatency *time.Duration // sets the batch size for pagination query for the certifier batchSize int + // last time the scan was done in hours, if not set it will return + // all packages to check + lastScan *int } var osvCmd = &cobra.Command{ @@ -74,6 +78,7 @@ var osvCmd = &cobra.Command{ viper.GetBool("add-license-on-ingest"), viper.GetString("certifier-latency"), viper.GetInt("certifier-batch-size"), + viper.GetInt("last-scan"), ) if err != nil { fmt.Printf("unable to validate flags: %v\n", err) @@ -100,7 +105,7 @@ var osvCmd = &cobra.Command{ httpClient := http.Client{Transport: transport} gqlclient := graphql.NewClient(opts.graphqlEndpoint, &httpClient) - packageQuery := root_package.NewPackageQuery(gqlclient, opts.batchSize, osvQuerySize, opts.addedLatency) + packageQuery := root_package.NewPackageQuery(gqlclient, generated.QueryTypeVulnerability, opts.batchSize, osvQuerySize, opts.addedLatency, opts.lastScan) totalNum := 0 docChan := make(chan *processor.Document) @@ -231,7 +236,7 @@ func validateOSVFlags( queryVulnIngestion bool, queryLicenseIngestion bool, certifierLatencyStr string, - batchSize int, + batchSize int, lastScan int, ) (osvOptions, error) { var opts osvOptions opts.graphqlEndpoint = graphqlEndpoint @@ -255,6 +260,10 @@ func validateOSVFlags( opts.batchSize = batchSize + if lastScan != 0 { + opts.lastScan = &lastScan + } + csubOpts, err := csub_client.ValidateCsubClientFlags(csubAddr, csubTls, csubTlsSkipVerify) if err != nil { return opts, fmt.Errorf("unable to validate csub client flags: %w", err) @@ -268,7 +277,7 @@ func validateOSVFlags( func init() { set, err := cli.BuildFlags([]string{"certifier-latency", - "certifier-batch-size"}) + "certifier-batch-size", "last-scan"}) if err != nil { fmt.Fprintf(os.Stderr, "failed to setup flag: %v", err) os.Exit(1) diff --git a/container_files/arango/guac.yaml b/container_files/arango/guac.yaml index 89f3163dc7..71e7d5155d 100644 --- a/container_files/arango/guac.yaml +++ b/container_files/arango/guac.yaml @@ -23,6 +23,9 @@ use-csub: true poll: true interval: 5m +# number of hours since the last scan was run. 0 means run on all packages/sources +last-scan: 4 + # arangodb arango-user: root arango-pass: test123 diff --git a/container_files/ent/guac.yaml b/container_files/ent/guac.yaml index ef01f6daa3..0a0b5bdbbb 100644 --- a/container_files/ent/guac.yaml +++ b/container_files/ent/guac.yaml @@ -23,6 +23,9 @@ use-csub: true poll: true interval: 5m +# number of hours since the last scan was run. 0 means run on all packages/sources +last-scan: 4 + # Ent config db-driver: postgres db-address: postgres://guac:guac@postgres:5432/guac?sslmode=disable diff --git a/container_files/guac/guac.yaml b/container_files/guac/guac.yaml index a8ec87d482..0a3e41b880 100644 --- a/container_files/guac/guac.yaml +++ b/container_files/guac/guac.yaml @@ -23,6 +23,9 @@ poll: true # certifier interval interval: 20m +# number of hours since the last scan was run. 0 means run on all packages/sources +last-scan: 4 + # set the batch size for the package pagination query certifier-batch-size: 60000 # add artificial latency to throttle the certifier diff --git a/container_files/neo4j/guac.yaml b/container_files/neo4j/guac.yaml index 17f5dd2d1d..39f3795153 100644 --- a/container_files/neo4j/guac.yaml +++ b/container_files/neo4j/guac.yaml @@ -23,6 +23,9 @@ use-csub: true poll: true interval: 5m +# number of hours since the last scan was run. 0 means run on all packages/sources +last-scan: 4 + # Neo4j details neo4j-user: neo4j neo4j-pass: s3cr3t diff --git a/container_files/redis/guac.yaml b/container_files/redis/guac.yaml index e4ed9838b6..dd3cbff1db 100644 --- a/container_files/redis/guac.yaml +++ b/container_files/redis/guac.yaml @@ -25,3 +25,6 @@ use-csub: true # certifier polling poll: true interval: 5m + +# number of hours since the last scan was run. 0 means run on all packages/sources +last-scan: 4 diff --git a/container_files/tikv/guac.yaml b/container_files/tikv/guac.yaml index e438c00c1a..c60e5f2861 100644 --- a/container_files/tikv/guac.yaml +++ b/container_files/tikv/guac.yaml @@ -25,3 +25,6 @@ use-csub: true # certifier polling poll: true interval: 5m + +# number of hours since the last scan was run. 0 means run on all packages/sources +last-scan: 4 diff --git a/guac.yaml b/guac.yaml index 049903877f..3aab9de8bc 100644 --- a/guac.yaml +++ b/guac.yaml @@ -27,6 +27,9 @@ blob-addr: file:///tmp/blobstore?no_tmp_dir=true # certifier interval interval: 20m +# number of hours since the last scan was run. 0 means run on all packages/sources +last-scan: 4 + # set the batch size for the package pagination query certifier-batch-size: 60000 # add artificial latency to throttle the certifier diff --git a/internal/testing/backend/main_test.go b/internal/testing/backend/main_test.go index c2670302f1..5c16b5d74f 100644 --- a/internal/testing/backend/main_test.go +++ b/internal/testing/backend/main_test.go @@ -96,9 +96,10 @@ var skipMatrix = map[string]map[string]bool{ "TestVEXBulkIngest": {arango: true, redis: true}, "TestFindSoftware": {redis: true, arango: true}, // remove these once its implemented for the other backends - "TestDeleteCertifyVuln": {arango: true, memmap: true, redis: true, tikv: true}, - "TestDeleteHasSBOM": {arango: true, memmap: true, redis: true, tikv: true}, - "TestDeleteHasSLSAs": {arango: true, memmap: true, redis: true, tikv: true}, + "TestDeleteCertifyVuln": {arango: true, memmap: true, redis: true, tikv: true}, + "TestDeleteHasSBOM": {arango: true, memmap: true, redis: true, tikv: true}, + "TestDeleteHasSLSAs": {arango: true, memmap: true, redis: true, tikv: true}, + "TestQueryPackagesListForScan": {arango: true, redis: true, tikv: true}, } type backend interface { diff --git a/internal/testing/backend/search_test.go b/internal/testing/backend/search_test.go index 1307759096..d58f7d6742 100644 --- a/internal/testing/backend/search_test.go +++ b/internal/testing/backend/search_test.go @@ -20,8 +20,10 @@ package backend_test import ( "context" "testing" + "time" "github.com/google/go-cmp/cmp" + "github.com/guacsec/guac/internal/testing/ptrfrom" "github.com/guacsec/guac/internal/testing/testdata" "github.com/guacsec/guac/pkg/assembler/graphql/model" ) @@ -158,3 +160,240 @@ func TestFindSoftware(t *testing.T) { }) } } + +func TestQueryPackagesListForScan(t *testing.T) { + ctx := context.Background() + b := setupTest(t) + now := time.Now().UTC() + type vulnCall struct { + Pkg *model.PkgInputSpec + Vuln *model.VulnerabilityInputSpec + CertifyVuln *model.ScanMetadataInput + } + type licenseCall struct { + PkgSrc model.PackageOrSourceInput + Dec []*model.IDorLicenseInput + Dis []*model.IDorLicenseInput + Legal *model.CertifyLegalInputSpec + } + tests := []struct { + InPkg []*model.IDorPkgInput + Name string + InVuln []*model.VulnerabilityInputSpec + InLic []*model.LicenseInputSpec + VulnCall []vulnCall + LicenseCall []licenseCall + QueryType model.QueryType + ExpNodes []*model.Package + lastScan *int + }{ + { + Name: "last scan 2 hour, certifyVuln not created", + InPkg: []*model.IDorPkgInput{{PackageInput: testdata.P5}}, + lastScan: ptrfrom.Int(2), + QueryType: model.QueryTypeVulnerability, + ExpNodes: []*model.Package{testdata.P5out}, + }, + { + Name: "last scan 2 hour, timescanned 1 hours ago", + InVuln: []*model.VulnerabilityInputSpec{testdata.C1}, + InPkg: []*model.IDorPkgInput{{PackageInput: testdata.P2}}, + VulnCall: []vulnCall{ + { + Pkg: testdata.P2, + Vuln: testdata.C1, + CertifyVuln: &model.ScanMetadataInput{ + Collector: "test collector", + Origin: "test origin", + ScannerVersion: "v1.0.0", + ScannerURI: "test scanner uri", + DbVersion: "2023.01.01", + DbURI: "test db uri", + TimeScanned: now.Add(time.Duration(-1) * time.Hour).UTC(), + }, + }, + }, + lastScan: ptrfrom.Int(2), + QueryType: model.QueryTypeVulnerability, + ExpNodes: []*model.Package{testdata.P5out}, + }, + { + Name: "last scan 1 hour, timescanned 2 hours ago", + InVuln: []*model.VulnerabilityInputSpec{testdata.G1}, + InPkg: []*model.IDorPkgInput{{PackageInput: testdata.P2}}, + VulnCall: []vulnCall{ + { + Pkg: testdata.P2, + Vuln: testdata.G1, + CertifyVuln: &model.ScanMetadataInput{ + Collector: "test collector", + Origin: "test origin", + ScannerVersion: "v1.0.0", + ScannerURI: "test scanner uri 1", + DbVersion: "2023.08.01", + DbURI: "test db uri", + TimeScanned: now.Add(time.Duration(-5) * time.Hour).UTC(), + }, + }, + }, + lastScan: ptrfrom.Int(1), + QueryType: model.QueryTypeVulnerability, + ExpNodes: []*model.Package{testdata.P2out, testdata.P5out}, + }, + { + Name: "last scan 4 hour, timescanned 4 hours ago", + InVuln: []*model.VulnerabilityInputSpec{testdata.G1}, + InPkg: []*model.IDorPkgInput{{PackageInput: testdata.P3}}, + VulnCall: []vulnCall{ + { + Pkg: testdata.P3, + Vuln: testdata.G1, + CertifyVuln: &model.ScanMetadataInput{ + Collector: "test collector", + Origin: "test origin", + ScannerVersion: "v1.0.0", + ScannerURI: "test scanner uri", + DbVersion: "2023.01.01", + DbURI: "test db uri", + TimeScanned: now.Add(time.Duration(-4) * time.Hour).UTC(), + }, + }, + }, + lastScan: ptrfrom.Int(4), + QueryType: model.QueryTypeVulnerability, + ExpNodes: []*model.Package{testdata.P3out, testdata.P5out}, + }, + { + Name: "last scan 1 hour, multiple packages, one package over 24 hours to not include", + InVuln: []*model.VulnerabilityInputSpec{testdata.NoVulnInput, testdata.C1}, + InPkg: []*model.IDorPkgInput{{PackageInput: testdata.P4}, {PackageInput: testdata.P1}}, + VulnCall: []vulnCall{ + { + Pkg: testdata.P4, + Vuln: testdata.NoVulnInput, + CertifyVuln: &model.ScanMetadataInput{ + Collector: "test collector", + Origin: "test origin", + ScannerVersion: "v1.0.0", + ScannerURI: "test scanner uri", + DbVersion: "2023.01.01", + DbURI: "test db uri", + TimeScanned: now.Add(time.Duration(-25) * time.Hour).UTC(), + }, + }, + { + Pkg: testdata.P4, + Vuln: testdata.NoVulnInput, + CertifyVuln: &model.ScanMetadataInput{ + Collector: "test collector", + Origin: "test origin", + ScannerVersion: "v1.0.0", + ScannerURI: "test scanner uri", + DbVersion: "2023.01.01", + DbURI: "test db uri", + TimeScanned: now.Add(time.Duration(-4) * time.Hour).UTC(), + }, + }, + { + Pkg: testdata.P1, + Vuln: testdata.C1, + CertifyVuln: &model.ScanMetadataInput{ + Collector: "test collector", + Origin: "test origin", + ScannerVersion: "v1.0.0", + ScannerURI: "test scanner uri", + DbVersion: "2023.01.01", + DbURI: "test db uri", + TimeScanned: now.Add(time.Duration(-4) * time.Hour).UTC(), + }, + }, + }, + lastScan: ptrfrom.Int(3), + QueryType: model.QueryTypeVulnerability, + ExpNodes: []*model.Package{testdata.P5out, testdata.P4out, testdata.P1out, testdata.P3out}, + }, + { + Name: "License - last scan 1 hour, multiple packages, one package over 24 hours to not include", + InPkg: []*model.IDorPkgInput{{PackageInput: testdata.P4}, {PackageInput: testdata.P1}}, + InLic: []*model.LicenseInputSpec{testdata.L1}, + LicenseCall: []licenseCall{ + { + PkgSrc: model.PackageOrSourceInput{ + Package: &model.IDorPkgInput{PackageInput: testdata.P1}, + }, + Dec: []*model.IDorLicenseInput{{LicenseInput: testdata.L1}}, + Legal: &model.CertifyLegalInputSpec{ + Justification: "test justification", + TimeScanned: now.Add(time.Duration(-25) * time.Hour).UTC(), + }, + }, + { + PkgSrc: model.PackageOrSourceInput{ + Package: &model.IDorPkgInput{PackageInput: testdata.P1}, + }, + Dec: []*model.IDorLicenseInput{{LicenseInput: testdata.L1}}, + Legal: &model.CertifyLegalInputSpec{ + Justification: "test justification", + TimeScanned: now.Add(time.Duration(-4) * time.Hour).UTC(), + }, + }, + { + PkgSrc: model.PackageOrSourceInput{ + Package: &model.IDorPkgInput{PackageInput: testdata.P2}, + }, + Dec: []*model.IDorLicenseInput{{LicenseInput: testdata.L1}}, + Legal: &model.CertifyLegalInputSpec{ + Justification: "test justification", + TimeScanned: now.Add(time.Duration(-2) * time.Hour).UTC(), + }, + }, + }, + lastScan: ptrfrom.Int(3), + QueryType: model.QueryTypeLicense, + ExpNodes: []*model.Package{testdata.P1out, testdata.P3out, testdata.P4out, testdata.P5out}, + }, + } + for _, test := range tests { + t.Run(test.Name, func(t *testing.T) { + for _, g := range test.InVuln { + if _, err := b.IngestVulnerability(ctx, model.IDorVulnerabilityInput{VulnerabilityInput: g}); err != nil { + t.Fatalf("Could not ingest vulnerability: %a", err) + } + } + if _, err := b.IngestPackages(ctx, test.InPkg); err != nil { + t.Fatalf("Could not ingest packages: %v", err) + } + for _, o := range test.VulnCall { + _, err := b.IngestCertifyVuln(ctx, model.IDorPkgInput{PackageInput: o.Pkg}, model.IDorVulnerabilityInput{VulnerabilityInput: o.Vuln}, *o.CertifyVuln) + if err != nil { + t.Fatalf("did not get expected ingest error, want: %v", err) + } + } + + for _, a := range test.InLic { + if _, err := b.IngestLicense(ctx, &model.IDorLicenseInput{LicenseInput: a}); err != nil { + t.Fatalf("Could not ingest license: %v", err) + } + } + for _, o := range test.LicenseCall { + _, err := b.IngestCertifyLegal(ctx, o.PkgSrc, o.Dec, o.Dis, o.Legal) + if err != nil { + t.Fatalf("did not get expected ingest error: %v", err) + } + } + got, err := b.QueryPackagesListForScan(ctx, model.PkgSpec{}, test.QueryType, test.lastScan, nil, ptrfrom.Int(10)) + if err != nil { + t.Fatalf("did not get expected query error: %v", err) + } + var returnedObjects []*model.Package + if got != nil { + for _, obj := range got.Edges { + returnedObjects = append(returnedObjects, obj.Node) + } + } + if diff := cmp.Diff(test.ExpNodes, returnedObjects, commonOpts); diff != "" { + t.Errorf("Unexpected results. (-want +got):\n%s", diff) + } + }) + } +} diff --git a/internal/testing/cmd/pubsub_test/cmd/osv.go b/internal/testing/cmd/pubsub_test/cmd/osv.go index 0661f341b4..ca9dae6d23 100644 --- a/internal/testing/cmd/pubsub_test/cmd/osv.go +++ b/internal/testing/cmd/pubsub_test/cmd/osv.go @@ -27,6 +27,7 @@ import ( "time" "github.com/Khan/genqlient/graphql" + "github.com/guacsec/guac/pkg/assembler/clients/generated" "github.com/guacsec/guac/pkg/blob" "github.com/guacsec/guac/pkg/certifier" "github.com/guacsec/guac/pkg/certifier/certify" @@ -92,7 +93,7 @@ func getCertifierPublish(ctx context.Context, blobStore *blob.BlobStore, pubsub func getPackageQuery(client graphql.Client) (func() certifier.QueryComponents, error) { return func() certifier.QueryComponents { - packageQuery := root_package.NewPackageQuery(client, 60000, 999, nil) + packageQuery := root_package.NewPackageQuery(client, generated.QueryTypeVulnerability, 60000, 999, nil, nil) return packageQuery }, nil } diff --git a/internal/testing/mocks/backend.go b/internal/testing/mocks/backend.go index a92936efae..d4c408b652 100644 --- a/internal/testing/mocks/backend.go +++ b/internal/testing/mocks/backend.go @@ -1390,6 +1390,21 @@ func (mr *MockBackendMockRecorder) PointOfContactList(ctx, pointOfContactSpec, a return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PointOfContactList", reflect.TypeOf((*MockBackend)(nil).PointOfContactList), ctx, pointOfContactSpec, after, first) } +// QueryPackagesListForScan mocks base method. +func (m *MockBackend) QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastScan *int, after *string, first *int) (*model.PackageConnection, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "QueryPackagesListForScan", ctx, pkgSpec, queryType, lastScan, after, first) + ret0, _ := ret[0].(*model.PackageConnection) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// QueryPackagesListForScan indicates an expected call of QueryPackagesListForScan. +func (mr *MockBackendMockRecorder) QueryPackagesListForScan(ctx, pkgSpec, queryType, lastScan, after, first any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "QueryPackagesListForScan", reflect.TypeOf((*MockBackend)(nil).QueryPackagesListForScan), ctx, pkgSpec, queryType, lastScan, after, first) +} + // Scorecards mocks base method. func (m *MockBackend) Scorecards(ctx context.Context, certifyScorecardSpec *model.CertifyScorecardSpec) ([]*model.CertifyScorecard, error) { m.ctrl.T.Helper() diff --git a/pkg/assembler/backends/arangodb/search.go b/pkg/assembler/backends/arangodb/search.go index e2f138a7cd..04fc166862 100644 --- a/pkg/assembler/backends/arangodb/search.go +++ b/pkg/assembler/backends/arangodb/search.go @@ -27,6 +27,10 @@ func (c *arangoClient) FindSoftwareList(ctx context.Context, searchText string, return nil, fmt.Errorf("not implemented: FindSoftwareList") } +func (c *arangoClient) QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastInterval *int, after *string, first *int) (*model.PackageConnection, error) { + return nil, fmt.Errorf("not implemented: QueryPackagesListForScan") +} + // TODO(lumjjb): add source when it is implemented in arango backend func (c *arangoClient) FindSoftware(ctx context.Context, searchText string) ([]model.PackageSourceOrArtifact, error) { diff --git a/pkg/assembler/backends/backends.go b/pkg/assembler/backends/backends.go index 3052b9f41a..3d3b48975f 100644 --- a/pkg/assembler/backends/backends.go +++ b/pkg/assembler/backends/backends.go @@ -141,6 +141,7 @@ type Backend interface { // Search queries: queries to help find data in GUAC based on text search FindSoftware(ctx context.Context, searchText string) ([]model.PackageSourceOrArtifact, error) FindSoftwareList(ctx context.Context, searchText string, after *string, first *int) (*model.FindSoftwareConnection, error) + QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastScan *int, after *string, first *int) (*model.PackageConnection, error) } // BackendArgs interface allows each backend to specify the arguments needed to diff --git a/pkg/assembler/backends/ent/backend/search.go b/pkg/assembler/backends/ent/backend/search.go index e5ff5406cc..cd54b532dd 100644 --- a/pkg/assembler/backends/ent/backend/search.go +++ b/pkg/assembler/backends/ent/backend/search.go @@ -18,9 +18,16 @@ package backend import ( "context" "fmt" + "time" + "entgo.io/contrib/entgql" + "entgo.io/ent/dialect/sql" + "github.com/google/uuid" + "github.com/guacsec/guac/internal/testing/ptrfrom" "github.com/guacsec/guac/pkg/assembler/backends/ent" "github.com/guacsec/guac/pkg/assembler/backends/ent/artifact" + "github.com/guacsec/guac/pkg/assembler/backends/ent/certifylegal" + "github.com/guacsec/guac/pkg/assembler/backends/ent/certifyvuln" "github.com/guacsec/guac/pkg/assembler/backends/ent/packagename" "github.com/guacsec/guac/pkg/assembler/backends/ent/packageversion" "github.com/guacsec/guac/pkg/assembler/backends/ent/sourcename" @@ -100,3 +107,119 @@ func (b *EntBackend) FindSoftware(ctx context.Context, searchText string) ([]mod func (b *EntBackend) FindSoftwareList(ctx context.Context, searchText string, after *string, first *int) (*model.FindSoftwareConnection, error) { return nil, fmt.Errorf("not implemented: FindSoftwareList") } + +func (b *EntBackend) QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastScan *int, after *string, first *int) (*model.PackageConnection, error) { + var afterCursor *entgql.Cursor[uuid.UUID] + + if after != nil { + globalID := fromGlobalID(*after) + if globalID.nodeType != packageversion.Table { + return nil, fmt.Errorf("after cursor is not type packageversion but type: %s", globalID.nodeType) + } + afterUUID, err := uuid.Parse(globalID.id) + if err != nil { + return nil, fmt.Errorf("failed to parse global ID with error: %w", err) + } + afterCursor = &ent.Cursor{ID: afterUUID} + } else { + afterCursor = nil + } + + var pkgConn *ent.PackageVersionConnection + if lastScan == nil { + var err error + pkgQuery := b.client.PackageVersion.Query(). + Where(packageQueryPredicates(&pkgSpec)) + + pkgConn, err = pkgQuery. + WithName(func(q *ent.PackageNameQuery) {}). + Paginate(ctx, afterCursor, first, nil, nil) + + if err != nil { + return nil, fmt.Errorf("failed package query with error: %w", err) + } + } else { + var pkgLatestScan []struct { + ID uuid.UUID `json:"id"` + LastScanTimeDB time.Time `json:"max"` + } + + if queryType == model.QueryTypeVulnerability { + err := b.client.PackageVersion.Query(). + Where(packageQueryPredicates(&pkgSpec)). + GroupBy(packageversion.FieldID). // Group by Package ID + Aggregate(func(s *sql.Selector) string { + t := sql.Table(certifyvuln.Table) + s.LeftJoin(t).On(s.C(packageversion.FieldID), t.C(certifyvuln.PackageColumn)) + return sql.As(sql.Max(t.C(certifyvuln.FieldTimeScanned)), "max") + }). + Scan(ctx, &pkgLatestScan) + + if err != nil { + return nil, fmt.Errorf("failed package query with error: %w", err) + } + } else { + err := b.client.PackageVersion.Query(). + Where(packageQueryPredicates(&pkgSpec)). + GroupBy(packageversion.FieldID). // Group by Package ID + Aggregate(func(s *sql.Selector) string { + t := sql.Table(certifylegal.Table) + s.LeftJoin(t).On(s.C(packageversion.FieldID), t.C(certifylegal.PackageColumn)) + return sql.As(sql.Max(t.C(certifylegal.FieldTimeScanned)), "max") + }). + Scan(ctx, &pkgLatestScan) + + if err != nil { + return nil, fmt.Errorf("failed package query with error: %w", err) + } + } + + lastScanTime := time.Now().Add(time.Duration(-*lastScan) * time.Hour).UTC() + var packagesThatNeedScanning []uuid.UUID + for _, record := range pkgLatestScan { + if record.LastScanTimeDB.Before(lastScanTime) { + packagesThatNeedScanning = append(packagesThatNeedScanning, record.ID) // Add the package ID + } + } + + if len(packagesThatNeedScanning) > 0 { + var queryErr error + pkgConn, queryErr = b.client.PackageVersion.Query(). + Where(packageversion.IDIn(packagesThatNeedScanning...)). + WithName(func(q *ent.PackageNameQuery) {}). + Paginate(ctx, afterCursor, first, nil, nil) + + if queryErr != nil { + return nil, fmt.Errorf("failed package query with error: %w", queryErr) + } + } + } + + // if not found return nil + if pkgConn == nil { + return nil, nil + } + + var edges []*model.PackageEdge + for _, edge := range pkgConn.Edges { + edges = append(edges, &model.PackageEdge{ + Cursor: pkgVersionGlobalID(edge.Cursor.ID.String()), + Node: toModelPackage(backReferencePackageVersion(edge.Node)), + }) + } + + if pkgConn.PageInfo.StartCursor != nil { + return &model.PackageConnection{ + TotalCount: pkgConn.TotalCount, + PageInfo: &model.PageInfo{ + HasNextPage: pkgConn.PageInfo.HasNextPage, + StartCursor: ptrfrom.String(pkgVersionGlobalID(pkgConn.PageInfo.StartCursor.ID.String())), + EndCursor: ptrfrom.String(pkgVersionGlobalID(pkgConn.PageInfo.EndCursor.ID.String())), + }, + Edges: edges, + }, nil + } else { + // if not found return nil + return nil, nil + } +} diff --git a/pkg/assembler/backends/keyvalue/search.go b/pkg/assembler/backends/keyvalue/search.go index 40daf0b26c..b896ef52ad 100644 --- a/pkg/assembler/backends/keyvalue/search.go +++ b/pkg/assembler/backends/keyvalue/search.go @@ -18,8 +18,11 @@ package keyvalue import ( "context" "fmt" + "sort" "strings" + "time" + "github.com/guacsec/guac/internal/testing/ptrfrom" "github.com/guacsec/guac/pkg/assembler/graphql/model" ) @@ -255,3 +258,213 @@ func (c *demoClient) searchPkgVersion(ctx context.Context, pkgNameNode *pkgName, return pvs } + +func (c *demoClient) QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastScan *int, after *string, first *int) (*model.PackageConnection, error) { + c.m.RLock() + defer c.m.RUnlock() + + edges := make([]*model.PackageEdge, 0) + hasNextPage := false + numNodes := 0 + totalCount := 0 + addToCount := 0 + + currentPage := false + + // If no cursor present start from the top + if after == nil { + currentPage = true + } + + var done bool + scn := c.kv.Keys(pkgTypeCol) + for !done { + var typeKeys []string + var err error + typeKeys, done, err = scn.Scan(ctx) + if err != nil { + return nil, err + } + + sort.Strings(typeKeys) + totalCount = len(typeKeys) + + for i, tk := range typeKeys { + pkgTypeNode, err := byKeykv[*pkgType](ctx, pkgTypeCol, tk, c) + if err != nil { + return nil, err + } + pNamespaces := []*model.PackageNamespace{} + for _, nsID := range pkgTypeNode.Namespaces { + pkgNS, err := byIDkv[*pkgNamespace](ctx, nsID, c) + if err != nil { + continue + } + pns := []*model.PackageName{} + for _, nameID := range pkgNS.Names { + pkgNameNode, err := byIDkv[*pkgName](ctx, nameID, c) + if err != nil { + continue + } + pvs := []*model.PackageVersion{} + for _, verID := range pkgNameNode.Versions { + pkgVer, err := byIDkv[*pkgVersion](ctx, verID, c) + if err != nil { + continue + } + if queryType == model.QueryTypeVulnerability { + if len(pkgVer.CertifyVulnLinks) > 0 { + var timeScanned []time.Time + for _, certVulnID := range pkgVer.CertifyVulnLinks { + link, err := byIDkv[*certifyVulnerabilityLink](ctx, certVulnID, c) + if err != nil { + continue + } + timeScanned = append(timeScanned, link.TimeScanned) + } + lastScanTime := latestTime(timeScanned) + lastIntervalTime := time.Now().Add(time.Duration(-*lastScan) * time.Hour).UTC() + if lastScanTime.Before(lastIntervalTime) { + pvs = append(pvs, &model.PackageVersion{ + ID: pkgVer.ThisID, + Version: pkgVer.Version, + Subpath: pkgVer.Subpath, + Qualifiers: getCollectedPackageQualifiers(pkgVer.Qualifiers), + }) + } + } else { + pvs = append(pvs, &model.PackageVersion{ + ID: pkgVer.ThisID, + Version: pkgVer.Version, + Subpath: pkgVer.Subpath, + Qualifiers: getCollectedPackageQualifiers(pkgVer.Qualifiers), + }) + } + } else { + if len(pkgVer.CertifyLegals) > 0 { + var timeScanned []time.Time + for _, certLegalID := range pkgVer.CertifyLegals { + link, err := byIDkv[*certifyLegalStruct](ctx, certLegalID, c) + if err != nil { + continue + } + timeScanned = append(timeScanned, link.TimeScanned) + } + lastScanTime := latestTime(timeScanned) + lastIntervalTime := time.Now().Add(time.Duration(-*lastScan) * time.Hour).UTC() + if lastScanTime.Before(lastIntervalTime) { + pvs = append(pvs, &model.PackageVersion{ + ID: pkgVer.ThisID, + Version: pkgVer.Version, + Subpath: pkgVer.Subpath, + Qualifiers: getCollectedPackageQualifiers(pkgVer.Qualifiers), + }) + } + } else { + pvs = append(pvs, &model.PackageVersion{ + ID: pkgVer.ThisID, + Version: pkgVer.Version, + Subpath: pkgVer.Subpath, + Qualifiers: getCollectedPackageQualifiers(pkgVer.Qualifiers), + }) + } + } + } + if len(pvs) > 0 { + pns = append(pns, &model.PackageName{ + ID: pkgNameNode.ThisID, + Name: pkgNameNode.Name, + Versions: pvs, + }) + } + } + if len(pns) > 0 { + pNamespaces = append(pNamespaces, &model.PackageNamespace{ + ID: pkgNS.ThisID, + Namespace: pkgNS.Namespace, + Names: pns, + }) + } + } + + for _, namespace := range pNamespaces { + for _, name := range namespace.Names { + for _, version := range name.Versions { + p := &model.Package{ + ID: pkgTypeNode.ThisID, + Type: pkgTypeNode.Type, + Namespaces: []*model.PackageNamespace{ + { + ID: namespace.ID, + Namespace: namespace.Namespace, + Names: []*model.PackageName{ + { + ID: name.ID, + Name: name.Name, + Versions: []*model.PackageVersion{ + version, + }, + }, + }, + }, + }, + } + + if after != nil && !currentPage { + if p.Namespaces[0].Names[0].Versions[0].ID == *after { + totalCount = len(typeKeys) - (i + 1) + currentPage = true + } + continue + } + + if first != nil { + if numNodes < *first { + edges = append(edges, &model.PackageEdge{ + Cursor: p.Namespaces[0].Names[0].Versions[0].ID, + Node: p, + }) + numNodes++ + } else if numNodes == *first { + hasNextPage = true + } + } else { + edges = append(edges, &model.PackageEdge{ + Cursor: p.Namespaces[0].Names[0].Versions[0].ID, + Node: p, + }) + } + } + } + } + } + } + + if len(edges) != 0 { + return &model.PackageConnection{ + TotalCount: totalCount + addToCount, + PageInfo: &model.PageInfo{ + HasNextPage: hasNextPage, + StartCursor: ptrfrom.String(edges[0].Node.ID), + EndCursor: ptrfrom.String(edges[max(numNodes-1, 0)].Node.ID), + }, + Edges: edges, + }, nil + } + return nil, nil +} + +// Get the latest time from a slice of time.Time +func latestTime(times []time.Time) time.Time { + if len(times) == 0 { + return time.Time{} // Return zero value of time.Time if slice is empty + } + + latest := times[0] // Initialize with the first time in the slice + for _, t := range times { + if t.After(latest) { + latest = t + } + } + return latest +} diff --git a/pkg/assembler/backends/neo4j/search.go b/pkg/assembler/backends/neo4j/search.go index 5562285a65..396cf870db 100644 --- a/pkg/assembler/backends/neo4j/search.go +++ b/pkg/assembler/backends/neo4j/search.go @@ -29,3 +29,7 @@ func (c *neo4jClient) FindSoftware(ctx context.Context, searchText string) ([]mo func (c *neo4jClient) FindSoftwareList(ctx context.Context, searchText string, after *string, first *int) (*model.FindSoftwareConnection, error) { return nil, fmt.Errorf("not implemented: FindSoftwareList") } + +func (c *neo4jClient) QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastInterval *int, after *string, first *int) (*model.PackageConnection, error) { + return nil, fmt.Errorf("not implemented: QueryPackagesListForScan") +} diff --git a/pkg/assembler/clients/generated/operations.go b/pkg/assembler/clients/generated/operations.go index 4715187982..e1ddbdd6b7 100644 --- a/pkg/assembler/clients/generated/operations.go +++ b/pkg/assembler/clients/generated/operations.go @@ -27257,6 +27257,207 @@ func (v *PointOfContactsResponse) GetPointOfContact() []PointOfContactsPointOfCo return v.PointOfContact } +// QueryPackagesListForScanQueryPackagesListForScanPackageConnection includes the requested fields of the GraphQL type PackageConnection. +// The GraphQL type's documentation follows. +// +// PackageConnection returns the paginated results for Package. +// +// totalCount is the total number of results returned. +// +// pageInfo provides information to the client if there is +// a next page of results and the starting and +// ending cursor for the current set. +// +// edges contains the PackageEdge which contains the current cursor +// and the Package node itself +type QueryPackagesListForScanQueryPackagesListForScanPackageConnection struct { + TotalCount int `json:"totalCount"` + Edges []QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge `json:"edges"` + PageInfo QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo `json:"pageInfo"` +} + +// GetTotalCount returns QueryPackagesListForScanQueryPackagesListForScanPackageConnection.TotalCount, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnection) GetTotalCount() int { + return v.TotalCount +} + +// GetEdges returns QueryPackagesListForScanQueryPackagesListForScanPackageConnection.Edges, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnection) GetEdges() []QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge { + return v.Edges +} + +// GetPageInfo returns QueryPackagesListForScanQueryPackagesListForScanPackageConnection.PageInfo, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnection) GetPageInfo() QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo { + return v.PageInfo +} + +// QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge includes the requested fields of the GraphQL type PackageEdge. +// The GraphQL type's documentation follows. +// +// PackageEdge contains the cursor for the resulting node and +// the Package node itself. +type QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge struct { + Cursor string `json:"cursor"` + Node QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage `json:"node"` +} + +// GetCursor returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge.Cursor, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge) GetCursor() string { + return v.Cursor +} + +// GetNode returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge.Node, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge) GetNode() QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage { + return v.Node +} + +// QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage includes the requested fields of the GraphQL type Package. +// The GraphQL type's documentation follows. +// +// Package represents the root of the package trie/tree. +// +// We map package information to a trie, closely matching the pURL specification +// (https://github.com/package-url/purl-spec/blob/0dd92f26f8bb11956ffdf5e8acfcee71e8560407/README.rst), +// but deviating from it where GUAC heuristics allow for better representation of +// package information. Each path in the trie fully represents a package; we split +// the trie based on the pURL components. +// +// This node matches a pkg: partial pURL. The type field matches the +// pURL types but we might also use "guac" for the cases where the pURL +// representation is not complete or when we have custom rules. +// +// Since this node is at the root of the package trie, it is named Package, not +// PackageType. +type QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage struct { + AllPkgTree `json:"-"` +} + +// GetId returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage.Id, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage) GetId() string { + return v.AllPkgTree.Id +} + +// GetType returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage.Type, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage) GetType() string { + return v.AllPkgTree.Type +} + +// GetNamespaces returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage.Namespaces, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage) GetNamespaces() []AllPkgTreeNamespacesPackageNamespace { + return v.AllPkgTree.Namespaces +} + +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage) UnmarshalJSON(b []byte) error { + + if string(b) == "null" { + return nil + } + + var firstPass struct { + *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage + graphql.NoUnmarshalJSON + } + firstPass.QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage = v + + err := json.Unmarshal(b, &firstPass) + if err != nil { + return err + } + + err = json.Unmarshal( + b, &v.AllPkgTree) + if err != nil { + return err + } + return nil +} + +type __premarshalQueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage struct { + Id string `json:"id"` + + Type string `json:"type"` + + Namespaces []AllPkgTreeNamespacesPackageNamespace `json:"namespaces"` +} + +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage) MarshalJSON() ([]byte, error) { + premarshaled, err := v.__premarshalJSON() + if err != nil { + return nil, err + } + return json.Marshal(premarshaled) +} + +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage) __premarshalJSON() (*__premarshalQueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage, error) { + var retval __premarshalQueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage + + retval.Id = v.AllPkgTree.Id + retval.Type = v.AllPkgTree.Type + retval.Namespaces = v.AllPkgTree.Namespaces + return &retval, nil +} + +// QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo includes the requested fields of the GraphQL type PageInfo. +// The GraphQL type's documentation follows. +// +// PageInfo serves the client information about the paginated query results. +// +// hasNextPage is true when there are results to be returned. +// +// hasPreviousPage is true when there is a previous page to return to. +// +// startCursor is the ID where the query started from. +// +// endCursor is where the query ended. +type QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo struct { + StartCursor *string `json:"startCursor"` + EndCursor *string `json:"endCursor"` + HasNextPage bool `json:"hasNextPage"` +} + +// GetStartCursor returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo.StartCursor, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo) GetStartCursor() *string { + return v.StartCursor +} + +// GetEndCursor returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo.EndCursor, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo) GetEndCursor() *string { + return v.EndCursor +} + +// GetHasNextPage returns QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo.HasNextPage, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo) GetHasNextPage() bool { + return v.HasNextPage +} + +// QueryPackagesListForScanResponse is returned by QueryPackagesListForScan on success. +type QueryPackagesListForScanResponse struct { + // queryPackagesListForScan returns a paginated results via PackageConnection + // for all packages that need to be re-scanned (based on the last scan in hours) + // or have never been scanned. + // + // queryType is used to specify if the last time scanned is checked for either + // certifyVuln or certifyLegal. + QueryPackagesListForScan *QueryPackagesListForScanQueryPackagesListForScanPackageConnection `json:"queryPackagesListForScan"` +} + +// GetQueryPackagesListForScan returns QueryPackagesListForScanResponse.QueryPackagesListForScan, and is useful for accessing the field via an interface. +func (v *QueryPackagesListForScanResponse) GetQueryPackagesListForScan() *QueryPackagesListForScanQueryPackagesListForScanPackageConnection { + return v.QueryPackagesListForScan +} + +// QueryType is used in conjunction with queryPackagesListForScan to +// specify if the last time scanned is checked for either certifyVuln +// or certifyLegal. +type QueryType string + +const ( + // direct dependency + QueryTypeVulnerability QueryType = "VULNERABILITY" + // indirect dependency + QueryTypeLicense QueryType = "LICENSE" +) + // SLSAInputSpec is the same as SLSA but for mutation input. type SLSAInputSpec struct { BuildType string `json:"buildType"` @@ -31128,6 +31329,30 @@ type __PointOfContactsInput struct { // GetFilter returns __PointOfContactsInput.Filter, and is useful for accessing the field via an interface. func (v *__PointOfContactsInput) GetFilter() PointOfContactSpec { return v.Filter } +// __QueryPackagesListForScanInput is used internally by genqlient +type __QueryPackagesListForScanInput struct { + Filter PkgSpec `json:"filter"` + QueryType QueryType `json:"queryType"` + LastScan *int `json:"lastScan"` + After *string `json:"after"` + First *int `json:"first"` +} + +// GetFilter returns __QueryPackagesListForScanInput.Filter, and is useful for accessing the field via an interface. +func (v *__QueryPackagesListForScanInput) GetFilter() PkgSpec { return v.Filter } + +// GetQueryType returns __QueryPackagesListForScanInput.QueryType, and is useful for accessing the field via an interface. +func (v *__QueryPackagesListForScanInput) GetQueryType() QueryType { return v.QueryType } + +// GetLastScan returns __QueryPackagesListForScanInput.LastScan, and is useful for accessing the field via an interface. +func (v *__QueryPackagesListForScanInput) GetLastScan() *int { return v.LastScan } + +// GetAfter returns __QueryPackagesListForScanInput.After, and is useful for accessing the field via an interface. +func (v *__QueryPackagesListForScanInput) GetAfter() *string { return v.After } + +// GetFirst returns __QueryPackagesListForScanInput.First, and is useful for accessing the field via an interface. +func (v *__QueryPackagesListForScanInput) GetFirst() *int { return v.First } + // __ScorecardsInput is used internally by genqlient type __ScorecardsInput struct { Filter CertifyScorecardSpec `json:"filter"` @@ -38912,6 +39137,82 @@ func PointOfContacts( return &data_, err_ } +// The query or mutation executed by QueryPackagesListForScan. +const QueryPackagesListForScan_Operation = ` +query QueryPackagesListForScan ($filter: PkgSpec!, $queryType: QueryType!, $lastScan: Int, $after: ID, $first: Int) { + queryPackagesListForScan(pkgSpec: $filter, queryType: $queryType, lastScan: $lastScan, after: $after, first: $first) { + totalCount + edges { + cursor + node { + ... AllPkgTree + } + } + pageInfo { + startCursor + endCursor + hasNextPage + } + } +} +fragment AllPkgTree on Package { + id + type + namespaces { + id + namespace + names { + id + name + versions { + id + purl + version + qualifiers { + key + value + } + subpath + } + } + } +} +` + +func QueryPackagesListForScan( + ctx_ context.Context, + client_ graphql.Client, + filter PkgSpec, + queryType QueryType, + lastScan *int, + after *string, + first *int, +) (*QueryPackagesListForScanResponse, error) { + req_ := &graphql.Request{ + OpName: "QueryPackagesListForScan", + Query: QueryPackagesListForScan_Operation, + Variables: &__QueryPackagesListForScanInput{ + Filter: filter, + QueryType: queryType, + LastScan: lastScan, + After: after, + First: first, + }, + } + var err_ error + + var data_ QueryPackagesListForScanResponse + resp_ := &graphql.Response{Data: &data_} + + err_ = client_.MakeRequest( + ctx_, + req_, + resp_, + ) + + return &data_, err_ +} + // The query or mutation executed by Scorecards. const Scorecards_Operation = ` query Scorecards ($filter: CertifyScorecardSpec!) { diff --git a/pkg/assembler/clients/operations/search.graphql b/pkg/assembler/clients/operations/search.graphql index de71d9bb61..98f667f4e2 100644 --- a/pkg/assembler/clients/operations/search.graphql +++ b/pkg/assembler/clients/operations/search.graphql @@ -27,3 +27,20 @@ query FindSoftware($searchText: String!) { } } } + +query QueryPackagesListForScan($filter: PkgSpec!, $queryType: QueryType!, $lastScan: Int, $after: ID, $first: Int) { + queryPackagesListForScan(pkgSpec: $filter, queryType: $queryType, lastScan: $lastScan, after: $after, first: $first) { + totalCount + edges { + cursor + node { + ...AllPkgTree + } + } + pageInfo { + startCursor + endCursor + hasNextPage + } + } +} diff --git a/pkg/assembler/graphql/generated/artifact.generated.go b/pkg/assembler/graphql/generated/artifact.generated.go index 11d88d68a2..56709da2ef 100644 --- a/pkg/assembler/graphql/generated/artifact.generated.go +++ b/pkg/assembler/graphql/generated/artifact.generated.go @@ -113,6 +113,7 @@ type QueryResolver interface { PkgEqualList(ctx context.Context, pkgEqualSpec model.PkgEqualSpec, after *string, first *int) (*model.PkgEqualConnection, error) FindSoftware(ctx context.Context, searchText string) ([]model.PackageSourceOrArtifact, error) FindSoftwareList(ctx context.Context, searchText string, after *string, first *int) (*model.FindSoftwareConnection, error) + QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastScan *int, after *string, first *int) (*model.PackageConnection, error) Sources(ctx context.Context, sourceSpec model.SourceSpec) ([]*model.Source, error) SourcesList(ctx context.Context, sourceSpec model.SourceSpec, after *string, first *int) (*model.SourceConnection, error) VulnEqual(ctx context.Context, vulnEqualSpec model.VulnEqualSpec) ([]*model.VulnEqual, error) @@ -6144,6 +6145,146 @@ func (ec *executionContext) field_Query_path_argsUsingOnly( return zeroVal, nil } +func (ec *executionContext) field_Query_queryPackagesListForScan_args(ctx context.Context, rawArgs map[string]interface{}) (map[string]interface{}, error) { + var err error + args := map[string]interface{}{} + arg0, err := ec.field_Query_queryPackagesListForScan_argsPkgSpec(ctx, rawArgs) + if err != nil { + return nil, err + } + args["pkgSpec"] = arg0 + arg1, err := ec.field_Query_queryPackagesListForScan_argsQueryType(ctx, rawArgs) + if err != nil { + return nil, err + } + args["queryType"] = arg1 + arg2, err := ec.field_Query_queryPackagesListForScan_argsLastScan(ctx, rawArgs) + if err != nil { + return nil, err + } + args["lastScan"] = arg2 + arg3, err := ec.field_Query_queryPackagesListForScan_argsAfter(ctx, rawArgs) + if err != nil { + return nil, err + } + args["after"] = arg3 + arg4, err := ec.field_Query_queryPackagesListForScan_argsFirst(ctx, rawArgs) + if err != nil { + return nil, err + } + args["first"] = arg4 + return args, nil +} +func (ec *executionContext) field_Query_queryPackagesListForScan_argsPkgSpec( + ctx context.Context, + rawArgs map[string]interface{}, +) (model.PkgSpec, error) { + // We won't call the directive if the argument is null. + // Set call_argument_directives_with_null to true to call directives + // even if the argument is null. + _, ok := rawArgs["pkgSpec"] + if !ok { + var zeroVal model.PkgSpec + return zeroVal, nil + } + + ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("pkgSpec")) + if tmp, ok := rawArgs["pkgSpec"]; ok { + return ec.unmarshalNPkgSpec2githubᚗcomᚋguacsecᚋguacᚋpkgᚋassemblerᚋgraphqlᚋmodelᚐPkgSpec(ctx, tmp) + } + + var zeroVal model.PkgSpec + return zeroVal, nil +} + +func (ec *executionContext) field_Query_queryPackagesListForScan_argsQueryType( + ctx context.Context, + rawArgs map[string]interface{}, +) (model.QueryType, error) { + // We won't call the directive if the argument is null. + // Set call_argument_directives_with_null to true to call directives + // even if the argument is null. + _, ok := rawArgs["queryType"] + if !ok { + var zeroVal model.QueryType + return zeroVal, nil + } + + ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("queryType")) + if tmp, ok := rawArgs["queryType"]; ok { + return ec.unmarshalNQueryType2githubᚗcomᚋguacsecᚋguacᚋpkgᚋassemblerᚋgraphqlᚋmodelᚐQueryType(ctx, tmp) + } + + var zeroVal model.QueryType + return zeroVal, nil +} + +func (ec *executionContext) field_Query_queryPackagesListForScan_argsLastScan( + ctx context.Context, + rawArgs map[string]interface{}, +) (*int, error) { + // We won't call the directive if the argument is null. + // Set call_argument_directives_with_null to true to call directives + // even if the argument is null. + _, ok := rawArgs["lastScan"] + if !ok { + var zeroVal *int + return zeroVal, nil + } + + ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("lastScan")) + if tmp, ok := rawArgs["lastScan"]; ok { + return ec.unmarshalOInt2ᚖint(ctx, tmp) + } + + var zeroVal *int + return zeroVal, nil +} + +func (ec *executionContext) field_Query_queryPackagesListForScan_argsAfter( + ctx context.Context, + rawArgs map[string]interface{}, +) (*string, error) { + // We won't call the directive if the argument is null. + // Set call_argument_directives_with_null to true to call directives + // even if the argument is null. + _, ok := rawArgs["after"] + if !ok { + var zeroVal *string + return zeroVal, nil + } + + ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("after")) + if tmp, ok := rawArgs["after"]; ok { + return ec.unmarshalOID2ᚖstring(ctx, tmp) + } + + var zeroVal *string + return zeroVal, nil +} + +func (ec *executionContext) field_Query_queryPackagesListForScan_argsFirst( + ctx context.Context, + rawArgs map[string]interface{}, +) (*int, error) { + // We won't call the directive if the argument is null. + // Set call_argument_directives_with_null to true to call directives + // even if the argument is null. + _, ok := rawArgs["first"] + if !ok { + var zeroVal *int + return zeroVal, nil + } + + ctx = graphql.WithPathContext(ctx, graphql.NewPathWithField("first")) + if tmp, ok := rawArgs["first"]; ok { + return ec.unmarshalOInt2ᚖint(ctx, tmp) + } + + var zeroVal *int + return zeroVal, nil +} + func (ec *executionContext) field_Query_scorecardsList_args(ctx context.Context, rawArgs map[string]interface{}) (map[string]interface{}, error) { var err error args := map[string]interface{}{} @@ -12317,6 +12458,63 @@ func (ec *executionContext) fieldContext_Query_findSoftwareList(ctx context.Cont return fc, nil } +func (ec *executionContext) _Query_queryPackagesListForScan(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Query_queryPackagesListForScan(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp := ec._fieldMiddleware(ctx, nil, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return ec.resolvers.Query().QueryPackagesListForScan(rctx, fc.Args["pkgSpec"].(model.PkgSpec), fc.Args["queryType"].(model.QueryType), fc.Args["lastScan"].(*int), fc.Args["after"].(*string), fc.Args["first"].(*int)) + }) + + if resTmp == nil { + return graphql.Null + } + res := resTmp.(*model.PackageConnection) + fc.Result = res + return ec.marshalOPackageConnection2ᚖgithubᚗcomᚋguacsecᚋguacᚋpkgᚋassemblerᚋgraphqlᚋmodelᚐPackageConnection(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Query_queryPackagesListForScan(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Query", + Field: field, + IsMethod: true, + IsResolver: true, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + switch field.Name { + case "totalCount": + return ec.fieldContext_PackageConnection_totalCount(ctx, field) + case "pageInfo": + return ec.fieldContext_PackageConnection_pageInfo(ctx, field) + case "edges": + return ec.fieldContext_PackageConnection_edges(ctx, field) + } + return nil, fmt.Errorf("no field named %q was found under type PackageConnection", field.Name) + }, + } + defer func() { + if r := recover(); r != nil { + err = ec.Recover(ctx, r) + ec.Error(ctx, err) + } + }() + ctx = graphql.WithFieldContext(ctx, fc) + if fc.Args, err = ec.field_Query_queryPackagesListForScan_args(ctx, field.ArgumentMap(ec.Variables)); err != nil { + ec.Error(ctx, err) + return fc, err + } + return fc, nil +} + func (ec *executionContext) _Query_sources(ctx context.Context, field graphql.CollectedField) (ret graphql.Marshaler) { fc, err := ec.fieldContext_Query_sources(ctx, field) if err != nil { @@ -14503,6 +14701,25 @@ func (ec *executionContext) _Query(ctx context.Context, sel ast.SelectionSet) gr func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) + case "queryPackagesListForScan": + field := field + + innerFunc := func(ctx context.Context, _ *graphql.FieldSet) (res graphql.Marshaler) { + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + } + }() + res = ec._Query_queryPackagesListForScan(ctx, field) + return res + } + + rrm := func(ctx context.Context) graphql.Marshaler { + return ec.OperationContext.RootResolverMiddleware(ctx, + func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + } + out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return rrm(innerCtx) }) case "sources": field := field diff --git a/pkg/assembler/graphql/generated/root_.generated.go b/pkg/assembler/graphql/generated/root_.generated.go index 5879fc8baf..9da6f7740b 100644 --- a/pkg/assembler/graphql/generated/root_.generated.go +++ b/pkg/assembler/graphql/generated/root_.generated.go @@ -572,6 +572,7 @@ type ComplexityRoot struct { PkgEqualList func(childComplexity int, pkgEqualSpec model.PkgEqualSpec, after *string, first *int) int PointOfContact func(childComplexity int, pointOfContactSpec model.PointOfContactSpec) int PointOfContactList func(childComplexity int, pointOfContactSpec model.PointOfContactSpec, after *string, first *int) int + QueryPackagesListForScan func(childComplexity int, pkgSpec model.PkgSpec, queryType model.QueryType, lastScan *int, after *string, first *int) int Scorecards func(childComplexity int, scorecardSpec model.CertifyScorecardSpec) int ScorecardsList func(childComplexity int, scorecardSpec model.CertifyScorecardSpec, after *string, first *int) int Sources func(childComplexity int, sourceSpec model.SourceSpec) int @@ -3551,6 +3552,18 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Query.PointOfContactList(childComplexity, args["pointOfContactSpec"].(model.PointOfContactSpec), args["after"].(*string), args["first"].(*int)), true + case "Query.queryPackagesListForScan": + if e.complexity.Query.QueryPackagesListForScan == nil { + break + } + + args, err := ec.field_Query_queryPackagesListForScan_args(context.TODO(), rawArgs) + if err != nil { + return 0, false + } + + return e.complexity.Query.QueryPackagesListForScan(childComplexity, args["pkgSpec"].(model.PkgSpec), args["queryType"].(model.QueryType), args["lastScan"].(*int), args["after"].(*string), args["first"].(*int)), true + case "Query.scorecards": if e.complexity.Query.Scorecards == nil { break @@ -7574,6 +7587,18 @@ type SoftwareEdge { node: PackageSourceOrArtifact! } +""" +QueryType is used in conjunction with queryPackagesListForScan to +specify if the last time scanned is checked for either certifyVuln +or certifyLegal. +""" +enum QueryType { + "direct dependency" + VULNERABILITY + "indirect dependency" + LICENSE +} + extend type Query { """ findSoftware takes in a searchText string and looks for software @@ -7597,6 +7622,16 @@ extend type Query { findSoftware(searchText: String!): [PackageSourceOrArtifact!]! "Returns a paginated results via CertifyBadConnection" findSoftwareList(searchText: String!, after: ID, first: Int): FindSoftwareConnection + + """ + queryPackagesListForScan returns a paginated results via PackageConnection + for all packages that need to be re-scanned (based on the last scan in hours) + or have never been scanned. + + queryType is used to specify if the last time scanned is checked for either + certifyVuln or certifyLegal. + """ + queryPackagesListForScan(pkgSpec: PkgSpec!, queryType: QueryType!, lastScan: Int, after: ID, first: Int): PackageConnection } `, BuiltIn: false}, {Name: "../schema/source.graphql", Input: `# diff --git a/pkg/assembler/graphql/generated/search.generated.go b/pkg/assembler/graphql/generated/search.generated.go index e4f0796d64..ccb10ffb51 100644 --- a/pkg/assembler/graphql/generated/search.generated.go +++ b/pkg/assembler/graphql/generated/search.generated.go @@ -357,6 +357,16 @@ func (ec *executionContext) _SoftwareEdge(ctx context.Context, sel ast.Selection // region ***************************** type.gotpl ***************************** +func (ec *executionContext) unmarshalNQueryType2githubᚗcomᚋguacsecᚋguacᚋpkgᚋassemblerᚋgraphqlᚋmodelᚐQueryType(ctx context.Context, v interface{}) (model.QueryType, error) { + var res model.QueryType + err := res.UnmarshalGQL(v) + return res, graphql.ErrorOnPath(ctx, err) +} + +func (ec *executionContext) marshalNQueryType2githubᚗcomᚋguacsecᚋguacᚋpkgᚋassemblerᚋgraphqlᚋmodelᚐQueryType(ctx context.Context, sel ast.SelectionSet, v model.QueryType) graphql.Marshaler { + return v +} + func (ec *executionContext) marshalNSoftwareEdge2ᚕᚖgithubᚗcomᚋguacsecᚋguacᚋpkgᚋassemblerᚋgraphqlᚋmodelᚐSoftwareEdgeᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.SoftwareEdge) graphql.Marshaler { ret := make(graphql.Array, len(v)) var wg sync.WaitGroup diff --git a/pkg/assembler/graphql/model/nodes.go b/pkg/assembler/graphql/model/nodes.go index 78e6b737a1..c401996361 100644 --- a/pkg/assembler/graphql/model/nodes.go +++ b/pkg/assembler/graphql/model/nodes.go @@ -2682,6 +2682,52 @@ func (e PkgMatchType) MarshalGQL(w io.Writer) { fmt.Fprint(w, strconv.Quote(e.String())) } +// QueryType is used in conjunction with queryPackagesListForScan to +// specify if the last time scanned is checked for either certifyVuln +// or certifyLegal. +type QueryType string + +const ( + // direct dependency + QueryTypeVulnerability QueryType = "VULNERABILITY" + // indirect dependency + QueryTypeLicense QueryType = "LICENSE" +) + +var AllQueryType = []QueryType{ + QueryTypeVulnerability, + QueryTypeLicense, +} + +func (e QueryType) IsValid() bool { + switch e { + case QueryTypeVulnerability, QueryTypeLicense: + return true + } + return false +} + +func (e QueryType) String() string { + return string(e) +} + +func (e *QueryType) UnmarshalGQL(v interface{}) error { + str, ok := v.(string) + if !ok { + return fmt.Errorf("enums must be strings") + } + + *e = QueryType(str) + if !e.IsValid() { + return fmt.Errorf("%s is not a valid QueryType", str) + } + return nil +} + +func (e QueryType) MarshalGQL(w io.Writer) { + fmt.Fprint(w, strconv.Quote(e.String())) +} + // Records the justification included in the VEX statement. type VexJustification string diff --git a/pkg/assembler/graphql/resolvers/search.resolvers.go b/pkg/assembler/graphql/resolvers/search.resolvers.go index 4ee866b4c9..c328796aa4 100644 --- a/pkg/assembler/graphql/resolvers/search.resolvers.go +++ b/pkg/assembler/graphql/resolvers/search.resolvers.go @@ -19,3 +19,8 @@ func (r *queryResolver) FindSoftware(ctx context.Context, searchText string) ([] func (r *queryResolver) FindSoftwareList(ctx context.Context, searchText string, after *string, first *int) (*model.FindSoftwareConnection, error) { return r.Backend.FindSoftwareList(ctx, searchText, after, first) } + +// QueryPackagesListForScan is the resolver for the queryPackagesListForScan field. +func (r *queryResolver) QueryPackagesListForScan(ctx context.Context, pkgSpec model.PkgSpec, queryType model.QueryType, lastScan *int, after *string, first *int) (*model.PackageConnection, error) { + return r.Backend.QueryPackagesListForScan(ctx, pkgSpec, queryType, lastScan, after, first) +} diff --git a/pkg/assembler/graphql/schema/search.graphql b/pkg/assembler/graphql/schema/search.graphql index 77a874fa01..9d17dec07a 100644 --- a/pkg/assembler/graphql/schema/search.graphql +++ b/pkg/assembler/graphql/schema/search.graphql @@ -41,6 +41,18 @@ type SoftwareEdge { node: PackageSourceOrArtifact! } +""" +QueryType is used in conjunction with queryPackagesListForScan to +specify if the last time scanned is checked for either certifyVuln +or certifyLegal. +""" +enum QueryType { + "direct dependency" + VULNERABILITY + "indirect dependency" + LICENSE +} + extend type Query { """ findSoftware takes in a searchText string and looks for software @@ -64,4 +76,14 @@ extend type Query { findSoftware(searchText: String!): [PackageSourceOrArtifact!]! "Returns a paginated results via CertifyBadConnection" findSoftwareList(searchText: String!, after: ID, first: Int): FindSoftwareConnection + + """ + queryPackagesListForScan returns a paginated results via PackageConnection + for all packages that need to be re-scanned (based on the last scan in hours) + or have never been scanned. + + queryType is used to specify if the last time scanned is checked for either + certifyVuln or certifyLegal. + """ + queryPackagesListForScan(pkgSpec: PkgSpec!, queryType: QueryType!, lastScan: Int, after: ID, first: Int): PackageConnection } diff --git a/pkg/certifier/components/root_package/root_package.go b/pkg/certifier/components/root_package/root_package.go index 6a8c3311d2..e1e4d2f163 100644 --- a/pkg/certifier/components/root_package/root_package.go +++ b/pkg/certifier/components/root_package/root_package.go @@ -41,18 +41,22 @@ type packageQuery struct { serviceBatchSize int // add artificial latency to throttle the pagination query addedLatency *time.Duration + lastScan *int + queryType generated.QueryType } -var getPackages func(ctx context.Context, client graphql.Client, filter generated.PkgSpec, after *string, first *int) (*generated.PackagesListResponse, error) +var getPackages func(ctx_ context.Context, client_ graphql.Client, filter generated.PkgSpec, queryType generated.QueryType, lastInterval *int, after *string, first *int) (*generated.QueryPackagesListForScanResponse, error) // NewPackageQuery initializes the packageQuery to query from the graph database -func NewPackageQuery(client graphql.Client, batchSize, serviceBatchSize int, addedLatency *time.Duration) certifier.QueryComponents { - getPackages = generated.PackagesList +func NewPackageQuery(client graphql.Client, queryType generated.QueryType, batchSize, serviceBatchSize int, addedLatency *time.Duration, lastScan *int) certifier.QueryComponents { + getPackages = generated.QueryPackagesListForScan return &packageQuery{ client: client, batchSize: batchSize, serviceBatchSize: serviceBatchSize, addedLatency: addedLatency, + lastScan: lastScan, + queryType: queryType, } } @@ -129,14 +133,15 @@ func (p *packageQuery) getPackageNodes(ctx context.Context, nodeChan chan<- *Pac first := p.batchSize for { - pkgConn, err := getPackages(ctx, p.client, generated.PkgSpec{}, afterCursor, &first) + pkgConn, err := getPackages(ctx, p.client, generated.PkgSpec{}, p.queryType, p.lastScan, afterCursor, &first) if err != nil { return fmt.Errorf("failed to query packages with error: %w", err) } - if pkgConn == nil || pkgConn.PackagesList == nil { + + if pkgConn == nil || pkgConn.QueryPackagesListForScan == nil { continue } - pkgEdges := pkgConn.PackagesList.Edges + pkgEdges := pkgConn.QueryPackagesListForScan.Edges for _, pkgNode := range pkgEdges { if pkgNode.Node.Type == guacType { @@ -153,10 +158,10 @@ func (p *packageQuery) getPackageNodes(ctx context.Context, nodeChan chan<- *Pac } } } - if !pkgConn.PackagesList.PageInfo.HasNextPage { + if !pkgConn.QueryPackagesListForScan.PageInfo.HasNextPage { break } - afterCursor = pkgConn.PackagesList.PageInfo.EndCursor + afterCursor = pkgConn.QueryPackagesListForScan.PageInfo.EndCursor // add artificial latency to throttle the pagination query if p.addedLatency != nil { time.Sleep(*p.addedLatency) diff --git a/pkg/certifier/components/root_package/root_package_test.go b/pkg/certifier/components/root_package/root_package_test.go index f08a8d2e91..ca9cfae730 100644 --- a/pkg/certifier/components/root_package/root_package_test.go +++ b/pkg/certifier/components/root_package/root_package_test.go @@ -23,6 +23,7 @@ import ( "time" "github.com/Khan/genqlient/graphql" + "github.com/guacsec/guac/internal/testing/ptrfrom" "github.com/guacsec/guac/pkg/assembler/clients/generated" "github.com/guacsec/guac/pkg/certifier" ) @@ -36,6 +37,8 @@ func TestNewPackageQuery(t *testing.T) { batchSize int serviceBatchSize int addedLatency *time.Duration + lastScan *int + queryType generated.QueryType } tests := []struct { name string @@ -48,17 +51,21 @@ func TestNewPackageQuery(t *testing.T) { batchSize: 60000, serviceBatchSize: 1000, addedLatency: nil, + lastScan: ptrfrom.Int(1), + queryType: generated.QueryTypeVulnerability, }, want: &packageQuery{ client: gqlclient, batchSize: 60000, serviceBatchSize: 1000, addedLatency: nil, + lastScan: ptrfrom.Int(1), + queryType: generated.QueryTypeVulnerability, }, }} for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := NewPackageQuery(tt.args.client, tt.args.batchSize, tt.args.serviceBatchSize, tt.args.addedLatency); !reflect.DeepEqual(got, tt.want) { + if got := NewPackageQuery(tt.args.client, tt.args.queryType, tt.args.batchSize, tt.args.serviceBatchSize, tt.args.addedLatency, tt.args.lastScan); !reflect.DeepEqual(got, tt.want) { t.Errorf("NewPackageQuery() = %v, want %v", got, tt.want) } }) @@ -66,7 +73,7 @@ func TestNewPackageQuery(t *testing.T) { } func Test_packageQuery_GetComponents(t *testing.T) { - testPypiPackage := generated.PackagesListPackagesListPackageConnectionEdgesPackageEdgeNodePackage{} + testPypiPackage := generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage{} testPypiPackage.Type = "pypi" testPypiPackage.Namespaces = append(testPypiPackage.Namespaces, generated.AllPkgTreeNamespacesPackageNamespace{ @@ -85,7 +92,7 @@ func Test_packageQuery_GetComponents(t *testing.T) { }, }) - testOpenSSLPackage := generated.PackagesListPackagesListPackageConnectionEdgesPackageEdgeNodePackage{} + testOpenSSLPackage := generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdgeNodePackage{} testOpenSSLPackage.Type = "conan" testOpenSSLPackage.Namespaces = append(testOpenSSLPackage.Namespaces, generated.AllPkgTreeNamespacesPackageNamespace{ Id: "", @@ -105,23 +112,25 @@ func Test_packageQuery_GetComponents(t *testing.T) { tests := []struct { name string - getPackages func(ctx context.Context, client graphql.Client, filter generated.PkgSpec, after *string, first *int) (*generated.PackagesListResponse, error) + lastScan int + getPackages func(ctx_ context.Context, client_ graphql.Client, filter generated.PkgSpec, queryType generated.QueryType, lastInterval *int, after *string, first *int) (*generated.QueryPackagesListForScanResponse, error) wantPackNode []*PackageNode wantErr bool }{ { - name: "django:", - getPackages: func(ctx context.Context, client graphql.Client, filter generated.PkgSpec, after *string, first *int) (*generated.PackagesListResponse, error) { - return &generated.PackagesListResponse{ - PackagesList: &generated.PackagesListPackagesListPackageConnection{ + name: "django:", + lastScan: 0, + getPackages: func(ctx_ context.Context, client_ graphql.Client, filter generated.PkgSpec, queryType generated.QueryType, lastInterval *int, after *string, first *int) (*generated.QueryPackagesListForScanResponse, error) { + return &generated.QueryPackagesListForScanResponse{ + QueryPackagesListForScan: &generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnection{ TotalCount: 1, - Edges: []generated.PackagesListPackagesListPackageConnectionEdgesPackageEdge{ + Edges: []generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge{ { Node: testPypiPackage, Cursor: "", }, }, - PageInfo: generated.PackagesListPackagesListPackageConnectionPageInfo{ + PageInfo: generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo{ HasNextPage: false, }, }, @@ -134,12 +143,13 @@ func Test_packageQuery_GetComponents(t *testing.T) { }, wantErr: false, }, { - name: "multiple packages", - getPackages: func(ctx context.Context, client graphql.Client, filter generated.PkgSpec, after *string, first *int) (*generated.PackagesListResponse, error) { - return &generated.PackagesListResponse{ - PackagesList: &generated.PackagesListPackagesListPackageConnection{ - TotalCount: 1, - Edges: []generated.PackagesListPackagesListPackageConnectionEdgesPackageEdge{ + name: "multiple packages", + lastScan: 0, + getPackages: func(ctx_ context.Context, client_ graphql.Client, filter generated.PkgSpec, queryType generated.QueryType, lastInterval *int, after *string, first *int) (*generated.QueryPackagesListForScanResponse, error) { + return &generated.QueryPackagesListForScanResponse{ + QueryPackagesListForScan: &generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnection{ + TotalCount: 2, + Edges: []generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnectionEdgesPackageEdge{ { Node: testPypiPackage, Cursor: "", @@ -149,7 +159,7 @@ func Test_packageQuery_GetComponents(t *testing.T) { Cursor: "", }, }, - PageInfo: generated.PackagesListPackagesListPackageConnectionPageInfo{ + PageInfo: generated.QueryPackagesListForScanQueryPackagesListForScanPackageConnectionPageInfo{ HasNextPage: false, }, }, @@ -170,10 +180,10 @@ func Test_packageQuery_GetComponents(t *testing.T) { t.Run(tt.name, func(t *testing.T) { ctx := context.Background() p := &packageQuery{ - client: nil, - batchSize: 1, - serviceBatchSize: 250, - addedLatency: &addedLatency, + client: nil, + batchSize: 1, + addedLatency: &addedLatency, + queryType: generated.QueryTypeVulnerability, } getPackages = tt.getPackages @@ -192,7 +202,7 @@ func Test_packageQuery_GetComponents(t *testing.T) { select { case d := <-compChan: if component, ok := d.([]*PackageNode); ok { - pnList = component + pnList = append(pnList, component...) } case err := <-errChan: if err != nil { @@ -204,7 +214,7 @@ func Test_packageQuery_GetComponents(t *testing.T) { for len(compChan) > 0 { d := <-compChan if component, ok := d.([]*PackageNode); ok { - pnList = component + pnList = append(pnList, component...) } } if !reflect.DeepEqual(pnList, tt.wantPackNode) { diff --git a/pkg/cli/store.go b/pkg/cli/store.go index 0e43fa1387..cf62a3896f 100644 --- a/pkg/cli/store.go +++ b/pkg/cli/store.go @@ -118,6 +118,8 @@ func init() { set.StringP("interval", "i", "5m", "if polling set interval, m, h, s, etc.") + set.IntP("last-scan", "l", 4, "hours since the last scan was run. If not set, run on all packages/sources") + set.BoolP("cert-good", "g", false, "enable to certifyGood, otherwise defaults to certifyBad") set.BoolP("package-name", "n", false, "if type is package, enable if attestation is at package-name level (for all versions), defaults to specific version")