Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: improve dotnet portable executable identification #2133

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 116 additions & 31 deletions syft/pkg/cataloger/dotnet/parse_dotnet_portable_executable.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,23 +58,14 @@ func parseDotnetPortableExecutable(_ file.Resolver, _ *generic.Environment, f fi
func buildDotNetPackage(versionResources map[string]string, f file.LocationReadCloser) (dnpkg pkg.Package, err error) {
name := findName(versionResources)
if name == "" {
return dnpkg, fmt.Errorf("unable to find FileDescription, or ProductName in PE file: %s", f.RealPath)
return dnpkg, fmt.Errorf("unable to find PE name in file: %s", f.RealPath)
}

version := findVersion(versionResources)
if strings.TrimSpace(version) == "" {
return dnpkg, fmt.Errorf("unable to find FileVersion in PE file: %s", f.RealPath)
if version == "" {
return dnpkg, fmt.Errorf("unable to find PE version in file: %s", f.RealPath)
}

purl := packageurl.NewPackageURL(
packageurl.TypeNuget, // See explanation in syft/pkg/cataloger/dotnet/package.go as to why this was chosen.
"",
name,
version,
nil,
"",
).ToString()

metadata := pkg.DotnetPortableExecutableEntry{
AssemblyVersion: versionResources["Assembly Version"],
LegalCopyright: versionResources["LegalCopyright"],
Expand All @@ -91,7 +82,7 @@ func buildDotNetPackage(versionResources map[string]string, f file.LocationReadC
Locations: file.NewLocationSet(f.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
Type: pkg.DotnetPkg,
Language: pkg.Dotnet,
PURL: purl,
PURL: portableExecutablePackageURL(name, version),
Metadata: metadata,
}

Expand All @@ -100,30 +91,124 @@ func buildDotNetPackage(versionResources map[string]string, f file.LocationReadC
return dnpkg, nil
}

func findVersion(versionResources map[string]string) string {
for _, key := range []string{"FileVersion"} {
if version, ok := versionResources[key]; ok {
if strings.TrimSpace(version) == "" {
continue
}
fields := strings.Fields(version)
if len(fields) > 0 {
return fields[0]
}
func portableExecutablePackageURL(name, version string) string {
return packageurl.NewPackageURL(
packageurl.TypeNuget, // See explanation in syft/pkg/cataloger/dotnet/package.go as to why this was chosen.
"",
name,
version,
nil,
"",
).ToString()
}

func extractVersion(version string) string {
version = strings.TrimSpace(version)

out := ""

// some example versions are: "1, 0, 0, 0", "Release 73" or "4.7.4076.0 built by: NET472REL1LAST_B"
// so try to split it and take the first parts that look numeric
for i, f := range strings.Fields(version) {
// if the output already has a number but the current segment does not have a number,
// return what we found for the version
if containsNumber(out) && !containsNumber(f) {
return out
}

if i == 0 {
// out will ge
kzantow marked this conversation as resolved.
Show resolved Hide resolved
out = f
} else {
out += " " + f
}
}
return ""

return out
}

func findVersion(versionResources map[string]string) string {
productVersion := extractVersion(versionResources["ProductVersion"])
fileVersion := extractVersion(versionResources["FileVersion"])

if productVersion == "" {
return fileVersion
}

if containsNumber(productVersion) && containsDot(productVersion) {
return productVersion
}

if containsNumber(fileVersion) && containsDot(fileVersion) {
return fileVersion
}

if containsNumber(productVersion) {
return productVersion
}

if containsNumber(fileVersion) {
return fileVersion
}

return productVersion
}

func containsNumber(out string) bool {
return strings.ContainsAny(out, "1234567890")
}

func containsDot(out string) bool {
return strings.ContainsRune(out, '.')
}

var (
// spaceRegex includes nbsp (#160) considered to be a space character
spaceRegex = regexp.MustCompile(`[\s\xa0]+`)
)

func findName(versionResources map[string]string) string {
for _, key := range []string{"FileDescription", "ProductName"} {
if name, ok := versionResources[key]; ok {
if strings.TrimSpace(name) == "" {
continue
}
trimmed := strings.TrimSpace(name)
return regexp.MustCompile(`[^a-zA-Z0-9.]+`).ReplaceAllString(trimmed, "")
// PE files found in the wild _not_ authored by Microsoft seem to use ProductName as a clear
// identifier of the software
nameFields := []string{"ProductName", "FileDescription", "InternalName", "OriginalFilename"}

if isMicrosoft(versionResources) {
// Microsoft seems to be consistent using the FileDescription, with a few that are blank and have
// fallbacks to ProductName last, as this is often something very broad like "Microsoft Windows"
nameFields = []string{"FileDescription", "InternalName", "OriginalFilename", "ProductName"}
}

for _, field := range nameFields {
value := spaceNormalize(versionResources[field])
if value == "" {
continue
}
return value
}

return ""
}

// normalizes a string to a trimmed version with all contigous whitespace collapsed to a single space character
func spaceNormalize(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return ""
}
// ensure valid utf8 text
value = strings.ToValidUTF8(value, "")
// consolidate all space characters
value = spaceRegex.ReplaceAllString(value, " ")
// remove other non-space, non-printable characters
value = regexp.MustCompile(`[\x00-\x1f]`).ReplaceAllString(value, "")
// consolidate all space characters again in case other non-printables were in-between
value = spaceRegex.ReplaceAllString(value, " ")
// finally, remove any remaining surrounding whitespace
value = strings.TrimSpace(value)
return value
}

func isMicrosoft(versionResources map[string]string) bool {
return strings.Contains(strings.ToLower(versionResources["CompanyName"]), "microsoft") ||
strings.Contains(strings.ToLower(versionResources["ProductName"]), "microsoft")
}
Loading
Loading