Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(engine): improve similarity id #6851

Merged
merged 19 commits into from
Jan 18, 2024
75 changes: 75 additions & 0 deletions e2e/fixtures/E2E_CLI_078_RESULT.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{
"kics_version": "development",
"files_scanned": 2,
"lines_scanned": 2,
"files_parsed": 2,
"lines_parsed": 2,
"lines_ignored": 0,
"files_failed_to_scan": 0,
"queries_total": 2,
"queries_failed_to_execute": 0,
"queries_failed_to_compute_similarity_id": 0,
"scan_id": "console",
"severity_counters": {
"HIGH": 0,
"INFO": 0,
"LOW": 0,
"MEDIUM": 3,
"TRACE": 0
},
"total_counter": 3,
"total_bom_resources": 0,
"start": "2024-01-15T11:47:59.44764Z",
"end": "2024-01-15T11:48:03.7408356Z",
"paths": [
"/path/test/fixtures/minified_files_similarity_id"
],
"queries": [
{
"query_name": "Pattern Undefined (v2)",
"query_id": "afde15cf-9444-4126-8c62-41cd79db1d1d",
"query_url": "https://swagger.io/specification/v2/#schemaObject",
"severity": "MEDIUM",
"platform": "OpenAPI",
"category": "Insecure Configurations",
"experimental": false,
"description": "String schema/parameter/header should have 'pattern' defined.",
"description_id": "16f07413",
"files": [
{
"file_name": "path\\test\\fixtures\\minified_files_similarity_id\\e2e\\swagger_1.json",
"similarity_id": "8dbfdf18ee8ceaf7ea11cf6384e650dae90568011b1241c8292c062b670b1cec",
"line": 1,
"issue_type": "MissingAttribute",
"search_key": "paths.{{/api/Document/GetAllLibraries}}.get.parameters.type",
"search_line": 1,
"search_value": "",
"expected_value": "'pattern' should be defined",
"actual_value": "'pattern' is undefined"
},
{
"file_name": "path\\test\\fixtures\\minified_files_similarity_id\\e2e\\swagger_1.json",
"similarity_id": "4ab2fd71196c89db11abe72cbc8a5bb461ee708820c01c663c179212609b214c",
"line": 1,
"issue_type": "MissingAttribute",
"search_key": "paths.{{/api/Document/GetDocumentDownload/{documentId}}}.get.parameters.type",
"search_line": 1,
"search_value": "",
"expected_value": "'pattern' should be defined",
"actual_value": "'pattern' is undefined"
},
{
"file_name": "path\\test\\fixtures\\minified_files_similarity_id\\e2e\\swagger_1.json",
"similarity_id": "b7513d6ba4ddf6fde4f5fa5818423785826a8eb3b9930c502215aaf272eb9bd8",
"line": 1,
"issue_type": "MissingAttribute",
"search_key": "paths.{{/api/Document/GetDocumentsInLibrary}}.get.parameters.type",
"search_line": 1,
"search_value": "",
"expected_value": "'pattern' should be defined",
"actual_value": "'pattern' is undefined"
}
]
}
]
}
27 changes: 27 additions & 0 deletions e2e/testcases/e2e-cli-078_similarity_id_minified_files.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package testcases

// E2E-CLI-078 - KICS scan
// should perform a scan and return three different similarity ids on the results
func init() { //nolint
testSample := TestCase{
Name: "should perform a scan and return three different similarity ids on the results [E2E-CLI-078]",
Args: args{
Args: []cmdArgs{
[]string{"scan", "-o", "/path/e2e/output",
"--output-name", "E2E_CLI_078_RESULT",
"-p", "\"/path/test/fixtures/minified_files_similarity_id\"",
"-i", "00b78adf-b83f-419c-8ed8-c6018441dd3a",
},
},
ExpectedResult: []ResultsValidation{
{
ResultsFile: "E2E_CLI_078_RESULT",
ResultsFormats: []string{"json"},
},
},
},
WantStatus: []int{40},
}

Tests = append(Tests, testSample)
}
33 changes: 28 additions & 5 deletions pkg/engine/vulnerability_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ func modifyVulSearchKeyReference(doc interface{}, originalSearchKey string, stri
}

// DefaultVulnerabilityBuilder defines a vulnerability builder to execute default actions of scan
var DefaultVulnerabilityBuilder = func(ctx *QueryContext, tracker Tracker,
v interface{}, detector *dec.DetectLine) (*model.Vulnerability, error) {
var DefaultVulnerabilityBuilder = func(ctx *QueryContext,
tracker Tracker,
v interface{},
detector *dec.DetectLine) (*model.Vulnerability, error) {
vObj, ok := v.(map[string]interface{})
if !ok {
return &model.Vulnerability{}, ErrInvalidResult
Expand Down Expand Up @@ -156,10 +158,8 @@ var DefaultVulnerabilityBuilder = func(ctx *QueryContext, tracker Tracker,
if v := mustMapKeyToString(vObj, "issueType"); v != nil {
issueType = model.IssueType(*v)
}
similarityID, err := buildSimilarityID(ctx, linesVulne.ResolvedFile, queryID, searchKey, similarityIDLineInfo, searchValue)

var similarityID *string

similarityID, err = similarity.ComputeSimilarityID(ctx.BaseScanPaths, linesVulne.ResolvedFile, queryID, similarityIDLineInfo, searchValue)
if err != nil {
logWithFields.Err(err).Send()
tracker.FailedComputeSimilarityID()
Expand Down Expand Up @@ -200,6 +200,29 @@ var DefaultVulnerabilityBuilder = func(ctx *QueryContext, tracker Tracker,
}, nil
}

func buildSimilarityID(
ctx *QueryContext,
resolvedFile,
queryID,
searchKey,
similarityIDLineInfo,
searchValue string) (*string, error) {
if checkMinified(ctx, resolvedFile) {
return similarity.ComputeSimilarityID(ctx.BaseScanPaths, resolvedFile, queryID, searchKey, searchValue)
} else {
return similarity.ComputeSimilarityID(ctx.BaseScanPaths, resolvedFile, queryID, similarityIDLineInfo, searchValue)
}
}

func checkMinified(ctx *QueryContext, resolvedFile string) bool {
for i := range ctx.Files {
if ctx.Files[i].FilePath == resolvedFile {
return ctx.Files[i].IsMinified
}
}
return false
}

func getCloudProvider(platform, overrideKey string, vObj map[string]interface{}, logWithFields *zerolog.Logger) string {
cloudProvider := ""
if platform == "Terraform" || platform == "CloudFormation" || platform == "Ansible" {
Expand Down
5 changes: 4 additions & 1 deletion pkg/kics/resolver_sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"sort"

sentryReport "github.com/Checkmarx/kics/internal/sentry"
"github.com/Checkmarx/kics/pkg/minified"
"github.com/Checkmarx/kics/pkg/model"
"github.com/Checkmarx/kics/pkg/utils"
"github.com/google/uuid"
Expand All @@ -30,7 +31,8 @@ func (s *Service) resolverSink(ctx context.Context, filename, scanID string, ope
countLines := bytes.Count(rfile.Content, []byte{'\n'}) + 1
s.Tracker.TrackFileFoundCountLines(countLines)

documents, err := s.Parser.Parse(rfile.FileName, rfile.Content, openAPIResolveReferences)
isMinified := minified.IsMinified(rfile.FileName, rfile.Content)
documents, err := s.Parser.Parse(rfile.FileName, rfile.Content, openAPIResolveReferences, isMinified)
if err != nil {
if documents.Kind == "break" {
return []string{}, nil
Expand Down Expand Up @@ -69,6 +71,7 @@ func (s *Service) resolverSink(ctx context.Context, filename, scanID string, ope
LinesIgnore: documents.IgnoreLines,
ResolvedFiles: documents.ResolvedFiles,
LinesOriginalData: utils.SplitLines(string(rfile.OriginalData)),
IsMinified: documents.IsMinified,
}
s.saveToFile(ctx, &file)
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/kics/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/Checkmarx/kics/pkg/engine"
"github.com/Checkmarx/kics/pkg/engine/provider"
"github.com/Checkmarx/kics/pkg/engine/secrets"
"github.com/Checkmarx/kics/pkg/minified"
"github.com/Checkmarx/kics/pkg/model"
"github.com/Checkmarx/kics/pkg/parser"
"github.com/Checkmarx/kics/pkg/resolver"
Expand Down Expand Up @@ -128,13 +129,14 @@ func (s *Service) StartScan(
type Content struct {
Content *[]byte
CountLines int
IsMinified bool
}

/*
getContent will read the passed file 1MB at a time
to prevent resource exhaustion and return its content
*/
func getContent(rc io.Reader, data []byte, maxSizeMB int) (*Content, error) {
func getContent(rc io.Reader, data []byte, maxSizeMB int, filename string) (*Content, error) {
var content []byte
countLines := 0

Expand Down Expand Up @@ -162,6 +164,7 @@ func getContent(rc io.Reader, data []byte, maxSizeMB int) (*Content, error) {
c.Content = &content
c.CountLines = countLines

c.IsMinified = minified.IsMinified(filename, content)
return c, nil
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/kics/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func (s *Service) sink(ctx context.Context, filename, scanID string,
s.Tracker.TrackFileFound()
log.Debug().Msgf("Starting to process file %s", filename)

c, err := getContent(rc, data, s.MaxFileSize)
c, err := getContent(rc, data, s.MaxFileSize, filename)

*c.Content = resolveCRLFFile(*c.Content)
content := c.Content
Expand All @@ -42,7 +42,7 @@ func (s *Service) sink(ctx context.Context, filename, scanID string,
if err != nil {
return errors.Wrapf(err, "failed to get file content: %s", filename)
}
documents, err := s.Parser.Parse(filename, *content, openAPIResolveReferences)
documents, err := s.Parser.Parse(filename, *content, openAPIResolveReferences, c.IsMinified)
if err != nil {
log.Err(err).Msgf("failed to parse file content: %s", filename)
return nil
Expand Down Expand Up @@ -87,6 +87,7 @@ func (s *Service) sink(ctx context.Context, filename, scanID string,
LinesIgnore: documents.IgnoreLines,
ResolvedFiles: documents.ResolvedFiles,
LinesOriginalData: utils.SplitLines(documents.Content),
IsMinified: documents.IsMinified,
}

s.saveToFile(ctx, &file)
Expand Down
35 changes: 35 additions & 0 deletions pkg/minified/minified.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package minified

import (
"regexp"
"strings"
)

func IsMinified(filename string, content []byte) bool {
if strings.HasSuffix(filename, ".json") {
return isMinifiedJSON(string(content))
} else if strings.HasSuffix(filename, ".yaml") || strings.HasSuffix(filename, ".yml") {
return isMinifiedYAML(string(content))
}
return false
}

func isMinifiedJSON(content string) bool {
// Define a regular expression to match common minification patterns
minifiedPattern := regexp.MustCompile(`\s+`)

// Count the number of non-whitespace characters
nonWhitespaceCount := len(minifiedPattern.ReplaceAllString(content, ""))

// 80% of non-whitespace characters
minifiedThreshold := 0.8

return float64(nonWhitespaceCount)/float64(len(content)) > minifiedThreshold
}

// minification is not a standard practice in yaml
// heuristic, check for newline followed by whitespace
func isMinifiedYAML(content string) bool {
// Check for lack of indentation
return !strings.Contains(content, "\n ")
}
59 changes: 59 additions & 0 deletions pkg/minified/minified_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package minified

import (
"github.com/stretchr/testify/assert"
"os"
"testing"
)

func Test_IsMinified(t *testing.T) {
giantMinifiedJson, _ := os.ReadFile("../../test/fixtures/test_minified/giantminified.json")
tests := []struct {
name string
nameFile string
args []byte
want bool
}{
{
name: "Mini minified file json",
nameFile: "test.json",
want: true,
args: []byte("{\"swagger\":\"2.0\",\"info\":{\"version\":\"v1\",\"title\":\"CCBCC.LAUNCHPAD.WebApi\"},\"host\":\"apiapp-dev-lpd.azurewebsites.net\",\"schemes\":[\"https\"],\"paths\":{\"/api/BlobFileDownload\":{\"get\":{\"tags\":[\"BlobFileDownload\"],\"operationId\":\"BlobFileDownload_GetBlobFileDownload\",\"consumes\":[],\"produces\":[\"application/json\",\"text/json\",\"application/xml\",\"text/xml\"],\"responses\":{\"200\":{\"description\":\"OK\",\"schema\":{\"type\":\"object\"}}}}},\"/api/BlobFileDownload/{id}\":{\"get\":{\"tags\":[\"BlobFileDownload\"],\"operationId\":\"BlobFileDownload_Get\",\"consumes\":[],\"produces\":[\"application/json\",\"text/json\",\"application/xml\",\"text/xml\"],\"parameters\":[{\"name\":\"id\",\"in\":\"path\",\"required\":true,\"type\":\"integer\",\"format\":\"int32\"}],\"responses\":{\"200\":{\"description\":\"OK\",\"schema\":{\"type\":\"object\"}}}}}},\"definitions\":{}}"),
},
{
name: "Huge minified file json",
nameFile: "test.json",
want: true,
args: giantMinifiedJson,
},
{
name: "File not json not yaml",
nameFile: "test.tf",
want: false,
args: []byte(""),
},
{
name: "Mini minified file yaml",
nameFile: "test.yml",
want: true,
args: []byte("[{name: my_elb_application, community.aws.elb_application_lb: {name: myelb, security_groups: [sg-12345678, my-sec-group], subnets: [subnet-012345678, subnet-abcdef000], listeners: [{Protocol: HTTP, Port: 80, SslPolicy: ELBSecurityPolicy-2015-05, Certificates: [{CertificateArn: 'arn:aws:iam::12345678987:server-certificate/test.domain.com'}], DefaultActions: [{Type: forward, TargetGroupName: targetname}]}], state: present}}, {name: my_elb_application2, community.aws.elb_application_lb: {name: myelb2, security_groups: [sg-12345678, my-sec-group], subnets: [subnet-012345678, subnet-abcdef000], listeners: {Port: 80, SslPolicy: ELBSecurityPolicy-2015-05, Certificates: [{CertificateArn: 'arn:aws:iam::12345678987:server-certificate/test.domain.com'}], DefaultActions: [{Type: forward, TargetGroupName: targetname}]}, state: present}}]"),
},
{
name: "Not minified file yaml",
nameFile: "test.yml",
want: false,
args: []byte("- name: my_elb_application\n community.aws.elb_application_lb:\n name: myelb\n security_groups:\n - sg-12345678\n - my-sec-group\n subnets:\n - subnet-012345678\n - subnet-abcdef000\n listeners:\n - Protocol: HTTP\n Port: 80\n SslPolicy: ELBSecurityPolicy-2015-05\n Certificates:\n - CertificateArn: arn:aws:iam::12345678987:server-certificate/test.domain.com\n DefaultActions:\n - Type: forward\n TargetGroupName: targetname\n state: present\n- name: my_elb_application2\n community.aws.elb_application_lb:\n name: myelb2\n security_groups:\n - sg-12345678\n - my-sec-group\n subnets:\n - subnet-012345678\n - subnet-abcdef000\n listeners:\n Port: 80\n SslPolicy: ELBSecurityPolicy-2015-05\n Certificates:\n - CertificateArn: arn:aws:iam::12345678987:server-certificate/test.domain.com\n DefaultActions:\n - Type: forward\n TargetGroupName: targetname\n state: present\n"),
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := IsMinified(tt.nameFile, tt.args)
if tt.want {
assert.True(t, result, tt.name)
} else {
assert.False(t, result, tt.name)
}
})
}
}
1 change: 1 addition & 0 deletions pkg/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ type FileMetadata struct {
LinesIgnore []int
ResolvedFiles map[string]ResolvedFile
LinesOriginalData *[]string
IsMinified bool
}

// QueryMetadata is a representation of general information about a query
Expand Down
4 changes: 3 additions & 1 deletion pkg/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type ParsedDocument struct {
IgnoreLines []int
CountLines int
ResolvedFiles map[string]model.ResolvedFile
IsMinified bool
}

// CommentsCommands gets commands on comments in the file beginning, before the code starts
Expand Down Expand Up @@ -117,7 +118,7 @@ func (c *Parser) CommentsCommands(filePath string, fileContent []byte) model.Com

// Parse executes a parser on the fileContent and returns the file content as a Document, the file kind and
// an error, if an error has occurred
func (c *Parser) Parse(filePath string, fileContent []byte, openAPIResolveReferences bool) (ParsedDocument, error) {
func (c *Parser) Parse(filePath string, fileContent []byte, openAPIResolveReferences, isMinified bool) (ParsedDocument, error) {
fileContent = utils.DecryptAnsibleVault(fileContent, os.Getenv("ANSIBLE_VAULT_PASSWORD_FILE"))

if c.isValidExtension(filePath) {
Expand All @@ -143,6 +144,7 @@ func (c *Parser) Parse(filePath string, fileContent []byte, openAPIResolveRefere
IgnoreLines: igLines,
CountLines: bytes.Count(resolved, []byte{'\n'}) + 1,
ResolvedFiles: c.parsers.GetResolvedFiles(),
IsMinified: isMinified,
}, nil
}
return ParsedDocument{
Expand Down
Loading
Loading