From ef5c1651efb5b4be5a6083aac35ea8781b00a2d3 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Wed, 29 Nov 2023 12:51:24 -0500 Subject: [PATCH] fix: improve dotnet portable executable identification (#2133) Signed-off-by: Keith Zantow --- .../parse_dotnet_portable_executable.go | 151 ++++++++++--- .../parse_dotnet_portable_executable_test.go | 212 ++++++++++++++++-- 2 files changed, 318 insertions(+), 45 deletions(-) diff --git a/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable.go b/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable.go index 3dc6a96820b..d3a782d587d 100644 --- a/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable.go +++ b/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable.go @@ -58,23 +58,14 @@ func parseDotnetPortableExecutable(_ file.Resolver, _ *generic.Environment, f fi func buildDotNetPackage(versionResources map[string]string, f file.LocationReadCloser) (dnpkg pkg.Package, err error) { name := findName(versionResources) if name == "" { - return dnpkg, fmt.Errorf("unable to find FileDescription, or ProductName in PE file: %s", f.RealPath) + return dnpkg, fmt.Errorf("unable to find PE name in file: %s", f.RealPath) } version := findVersion(versionResources) - if strings.TrimSpace(version) == "" { - return dnpkg, fmt.Errorf("unable to find FileVersion in PE file: %s", f.RealPath) + if version == "" { + return dnpkg, fmt.Errorf("unable to find PE version in file: %s", f.RealPath) } - purl := packageurl.NewPackageURL( - packageurl.TypeNuget, // See explanation in syft/pkg/cataloger/dotnet/package.go as to why this was chosen. - "", - name, - version, - nil, - "", - ).ToString() - metadata := pkg.DotnetPortableExecutableEntry{ AssemblyVersion: versionResources["Assembly Version"], LegalCopyright: versionResources["LegalCopyright"], @@ -91,7 +82,7 @@ func buildDotNetPackage(versionResources map[string]string, f file.LocationReadC Locations: file.NewLocationSet(f.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)), Type: pkg.DotnetPkg, Language: pkg.Dotnet, - PURL: purl, + PURL: portableExecutablePackageURL(name, version), Metadata: metadata, } @@ -100,30 +91,128 @@ func buildDotNetPackage(versionResources map[string]string, f file.LocationReadC return dnpkg, nil } -func findVersion(versionResources map[string]string) string { - for _, key := range []string{"FileVersion"} { - if version, ok := versionResources[key]; ok { - if strings.TrimSpace(version) == "" { - continue - } - fields := strings.Fields(version) - if len(fields) > 0 { - return fields[0] - } +func portableExecutablePackageURL(name, version string) string { + return packageurl.NewPackageURL( + packageurl.TypeNuget, // See explanation in syft/pkg/cataloger/dotnet/package.go as to why this was chosen. + "", + name, + version, + nil, + "", + ).ToString() +} + +func extractVersion(version string) string { + version = strings.TrimSpace(version) + + out := "" + + // some example versions are: "1, 0, 0, 0", "Release 73" or "4.7.4076.0 built by: NET472REL1LAST_B" + // so try to split it and take the first parts that look numeric + for i, f := range strings.Fields(version) { + // if the output already has a number but the current segment does not have a number, + // return what we found for the version + if containsNumber(out) && !containsNumber(f) { + return out + } + + if i == 0 { + out = f + } else { + out += " " + f } } - return "" + + return out } +func findVersion(versionResources map[string]string) string { + productVersion := extractVersion(versionResources["ProductVersion"]) + fileVersion := extractVersion(versionResources["FileVersion"]) + + if productVersion == "" { + return fileVersion + } + + productVersionDetail := punctuationCount(productVersion) + fileVersionDetail := punctuationCount(fileVersion) + + if containsNumber(productVersion) && productVersionDetail >= fileVersionDetail { + return productVersion + } + + if containsNumber(fileVersion) && fileVersionDetail > 0 { + return fileVersion + } + + if containsNumber(productVersion) { + return productVersion + } + + if containsNumber(fileVersion) { + return fileVersion + } + + return productVersion +} + +func containsNumber(s string) bool { + return numberRegex.MatchString(s) +} + +func punctuationCount(s string) int { + return len(versionPunctuationRegex.FindAllString(s, -1)) +} + +var ( + // spaceRegex includes nbsp (#160) considered to be a space character + spaceRegex = regexp.MustCompile(`[\s\xa0]+`) + numberRegex = regexp.MustCompile(`\d`) + versionPunctuationRegex = regexp.MustCompile(`[.,]+`) +) + func findName(versionResources map[string]string) string { - for _, key := range []string{"FileDescription", "ProductName"} { - if name, ok := versionResources[key]; ok { - if strings.TrimSpace(name) == "" { - continue - } - trimmed := strings.TrimSpace(name) - return regexp.MustCompile(`[^a-zA-Z0-9.]+`).ReplaceAllString(trimmed, "") + // PE files found in the wild _not_ authored by Microsoft seem to use ProductName as a clear + // identifier of the software + nameFields := []string{"ProductName", "FileDescription", "InternalName", "OriginalFilename"} + + if isMicrosoft(versionResources) { + // Microsoft seems to be consistent using the FileDescription, with a few that are blank and have + // fallbacks to ProductName last, as this is often something very broad like "Microsoft Windows" + nameFields = []string{"FileDescription", "InternalName", "OriginalFilename", "ProductName"} + } + + for _, field := range nameFields { + value := spaceNormalize(versionResources[field]) + if value == "" { + continue } + return value } + return "" } + +// normalizes a string to a trimmed version with all contigous whitespace collapsed to a single space character +func spaceNormalize(value string) string { + value = strings.TrimSpace(value) + if value == "" { + return "" + } + // ensure valid utf8 text + value = strings.ToValidUTF8(value, "") + // consolidate all space characters + value = spaceRegex.ReplaceAllString(value, " ") + // remove other non-space, non-printable characters + value = regexp.MustCompile(`[\x00-\x1f]`).ReplaceAllString(value, "") + // consolidate all space characters again in case other non-printables were in-between + value = spaceRegex.ReplaceAllString(value, " ") + // finally, remove any remaining surrounding whitespace + value = strings.TrimSpace(value) + return value +} + +func isMicrosoft(versionResources map[string]string) bool { + return strings.Contains(strings.ToLower(versionResources["CompanyName"]), "microsoft") || + strings.Contains(strings.ToLower(versionResources["ProductName"]), "microsoft") +} diff --git a/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable_test.go b/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable_test.go index 84daa4f4e73..d0347052943 100644 --- a/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable_test.go +++ b/syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable_test.go @@ -32,12 +32,8 @@ func TestParseDotnetPortableExecutable(t *testing.T) { "Assembly Version": "3.14.2.11", }, expectedPackage: pkg.Package{ - Name: "ActiveDirectoryAuthenticationLibrary", - Version: "3.14.40721.0918", - Locations: file.NewLocationSet(file.NewLocation("").WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)), - Type: pkg.DotnetPkg, - Language: pkg.Dotnet, - PURL: "pkg:nuget/ActiveDirectoryAuthenticationLibrary@3.14.40721.0918", + Name: "Active Directory Authentication Library", + Version: "3.14.40721.0918", Metadata: pkg.DotnetPortableExecutableEntry{ AssemblyVersion: "3.14.2.11", LegalCopyright: "Copyright (c) Microsoft Corporation. All rights reserved.", @@ -52,7 +48,7 @@ func TestParseDotnetPortableExecutable(t *testing.T) { name: "dotnet package with malformed field and extended version", versionResources: map[string]string{ "CompanyName": "Microsoft Corporation", - "FileDescription": "äbFileVersion", + "FileDescription": "äbFile\xa0\xa1Versi on", "FileVersion": "4.6.25512.01 built by: dlab-DDVSOWINAGE016. Commit Hash: d0d5c7b49271cadb6d97de26d8e623e98abdc8db", "InternalName": "äbFileVersion", "LegalCopyright": "© Microsoft Corporation. All rights reserved.", @@ -61,13 +57,9 @@ func TestParseDotnetPortableExecutable(t *testing.T) { "ProductVersion": "4.6.25512.01 built by: dlab-DDVSOWINAGE016. Commit Hash: d0d5c7b49271cadb6d97de26d8e623e98abdc8db", }, expectedPackage: pkg.Package{ - Name: "bFileVersion", + Name: "äbFileVersi on", Version: "4.6.25512.01", - Locations: file.NewLocationSet( - file.NewLocation("").WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)), - Type: pkg.DotnetPkg, - Language: pkg.Dotnet, - PURL: "pkg:nuget/bFileVersion@4.6.25512.01", + PURL: "pkg:nuget/%C3%A4bFileVersi%20on@4.6.25512.01", Metadata: pkg.DotnetPortableExecutableEntry{ LegalCopyright: "© Microsoft Corporation. All rights reserved.", InternalName: "äb\x01FileVersion", @@ -77,16 +69,208 @@ func TestParseDotnetPortableExecutable(t *testing.T) { }, }, }, + { + name: "System.Data.Linq.dll", + versionResources: map[string]string{ + "CompanyName": "Microsoft Corporation", + "FileDescription": "System.Data.Linq.dll", + "FileVersion": "4.7.3190.0 built by: NET472REL1LAST_C", + "InternalName": "System.Data.Linq.dll", + "LegalCopyright": "© Microsoft Corporation. All rights reserved.", + "OriginalFilename": "System.Data.Linq.dll", + "ProductName": "Microsoft® .NET Framework", + "ProductVersion": "4.7.3190.0", + }, + expectedPackage: pkg.Package{ + Name: "System.Data.Linq.dll", + Version: "4.7.3190.0", + }, + }, + { + name: "curl", + versionResources: map[string]string{ + "CompanyName": "curl, https://curl.se/", + "FileDescription": "The curl executable", + "FileVersion": "8.4.0", + "InternalName": "curl", + "LegalCopyright": "© Daniel Stenberg, .", + "OriginalFilename": "curl.exe", + "ProductName": "The curl executable", + "ProductVersion": "8.4.0", + }, + expectedPackage: pkg.Package{ + Name: "The curl executable", + Version: "8.4.0", + }, + }, + { + name: "Prometheus", + versionResources: map[string]string{ + "AssemblyVersion": "8.0.0.0", + "CompanyName": "", + "FileDescription": "", + "FileVersion": "8.0.1", + "InternalName": "Prometheus.AspNetCore.dll", + "OriginalFilename": "Prometheus.AspNetCore.dll", + "ProductName": "", + "ProductVersion": "8.0.1", + }, + expectedPackage: pkg.Package{ + Name: "Prometheus.AspNetCore.dll", + Version: "8.0.1", + }, + }, + { + name: "Hidden Input", + versionResources: map[string]string{ + "FileDescription": "Reads from stdin without leaking info to the terminal and outputs back to stdout", + "FileVersion": "1, 0, 0, 0", + "InternalName": "hiddeninput", + "LegalCopyright": "Jordi Boggiano - 2012", + "OriginalFilename": "hiddeninput.exe", + "ProductName": "Hidden Input", + "ProductVersion": "1, 0, 0, 0", + }, + expectedPackage: pkg.Package{ + Name: "Hidden Input", + Version: "1, 0, 0, 0", + }, + }, + { + name: "SQLite3", + versionResources: map[string]string{ + "CompanyName": "SQLite Development Team", + "FileDescription": "SQLite is a software library that implements a self-contained, serverless, zero-configuration, transactional SQL database engine.", + "FileVersion": "3.23.2", + "InternalName": "sqlite3", + "LegalCopyright": "http://www.sqlite.org/copyright.html", + "ProductName": "SQLite", + "ProductVersion": "3.23.2", + }, + expectedPackage: pkg.Package{ + Name: "SQLite", + Version: "3.23.2", + }, + }, + { + name: "Brave Browser", + versionResources: map[string]string{ + "CompanyName": "Brave Software, Inc.", + "FileDescription": "Brave Browser", + "FileVersion": "80.1.7.92", + "InternalName": "chrome_exe", + "LegalCopyright": "Copyright 2016 The Brave Authors. All rights reserved.", + "OriginalFilename": "chrome.exe", + "ProductName": "Brave Browser", + "ProductVersion": "80.1.7.92", + }, + expectedPackage: pkg.Package{ + Name: "Brave Browser", + Version: "80.1.7.92", + }, + }, + { + name: "Better product version", + versionResources: map[string]string{ + "FileDescription": "Better version", + "FileVersion": "80.1.7", + "ProductVersion": "80.1.7.92", + }, + expectedPackage: pkg.Package{ + Name: "Better version", + Version: "80.1.7.92", + }, + }, + { + name: "Better file version", + versionResources: map[string]string{ + "FileDescription": "Better version", + "FileVersion": "80.1.7.92", + "ProductVersion": "80.1.7", + }, + expectedPackage: pkg.Package{ + Name: "Better version", + Version: "80.1.7.92", + }, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { + location := file.NewLocation("") f := file.LocationReadCloser{ - Location: file.NewLocation(""), + Location: location, } got, err := buildDotNetPackage(tc.versionResources, f) assert.NoErrorf(t, err, "failed to build package from version resources: %+v", tc.versionResources) + + // ignore certain metadata + if tc.expectedPackage.Metadata == nil { + got.Metadata = nil + } + // set known defaults + if tc.expectedPackage.Type == "" { + tc.expectedPackage.Type = pkg.DotnetPkg + } + if tc.expectedPackage.Language == "" { + tc.expectedPackage.Language = pkg.Dotnet + } + if tc.expectedPackage.PURL == "" { + tc.expectedPackage.PURL = portableExecutablePackageURL(tc.expectedPackage.Name, tc.expectedPackage.Version) + } + tc.expectedPackage.Locations = file.NewLocationSet(location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)) + pkgtest.AssertPackagesEqual(t, tc.expectedPackage, got) }) } } + +func Test_extractVersion(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + input: "1, 0, 0, 0", + expected: "1, 0, 0, 0", + }, + { + input: "Release 73", + expected: "Release 73", + }, + { + input: "4.7.4076.0 built by: NET472REL1LAST_B", + expected: "4.7.4076.0", + }, + } + + for _, test := range tests { + t.Run(test.input, func(t *testing.T) { + got := extractVersion(test.input) + assert.Equal(t, test.expected, got) + }) + } +} + +func Test_spaceNormalize(t *testing.T) { + tests := []struct { + input string + expected string + }{ + { + expected: "some spaces apart", + input: " some spaces\n\t\t \n\rapart\n", + }, + { + expected: "söme ¡nvalid characters", + input: "\rsöme \u0001¡nvalid\t characters\n", + }, + } + + for _, test := range tests { + t.Run(test.expected, func(t *testing.T) { + got := spaceNormalize(test.input) + assert.Equal(t, test.expected, got) + }) + } +}