From 79a0d76483192e5ddd849a4c8f90e79f464a9879 Mon Sep 17 00:00:00 2001 From: rzadp Date: Mon, 9 Sep 2024 17:14:15 +0200 Subject: [PATCH 1/8] ensure product --- README.md | 22 ++++++++++++++++++++++ license-scanner/cli/scan.ts | 2 ++ license-scanner/license.ts | 32 ++++++++++++++++++++++++++++++-- license-scanner/main.ts | 7 +++++++ license-scanner/scanner.ts | 5 ++++- license-scanner/types.ts | 10 +++++++++- 6 files changed, 74 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index af2c281..7a3d6d0 100644 --- a/README.md +++ b/README.md @@ -276,6 +276,28 @@ yarn start -- scan --ensure-any-license /directory/or/file Those options are conflicting with each other so only one should be specified. By default, no licensing is enforced. +## `--ensure-product` + +If configured, the scan will make sure that if a license header references a product, +it will be the correct product and not a result of a copy-paste error. + +For example, this fragment references the `Substrate` product. + +```text +// Substrate is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +``` + +Examples: + +```bash +yarn start -- scan --ensure-product Polkadot -- /directory/or/file +``` + +It treats a different product reference as an error, but it allows a generic "this program". + ## `--exclude` Can be used to exclude files or directories from the scan. diff --git a/license-scanner/cli/scan.ts b/license-scanner/cli/scan.ts index 7998716..f0d0a14 100644 --- a/license-scanner/cli/scan.ts +++ b/license-scanner/cli/scan.ts @@ -86,6 +86,7 @@ export const executeScan = async function ({ detectionOverrides, logLevel, ensureLicenses, + ensureProduct, exclude, }: ScanCliArgs) { const licenses = await loadLicensesNormalized(joinPath(buildRoot, "licenses"), { @@ -119,6 +120,7 @@ export const executeScan = async function ({ detectionOverrides: detectionOverrides ?? null, logger, ensureLicenses, + ensureProduct, }); allLicensingErrors.push(...licensingErrors); } diff --git a/license-scanner/license.ts b/license-scanner/license.ts index bc15553..0d9093b 100644 --- a/license-scanner/license.ts +++ b/license-scanner/license.ts @@ -2,7 +2,7 @@ import assert from "assert"; import fs from "fs"; import { promisify } from "util"; -import { EnsureLicensesInResultOptions, License, LicenseInput } from "./types"; +import { EnsureLicensesInResultOptions, LicenceMatcher, License, LicenseInput } from "./types"; import { isBinaryFile, loadFiles } from "./utils"; const openAsync = promisify(fs.open); @@ -79,7 +79,7 @@ const spdxLicenseIdentifierMatcher = new RegExp(`${spdxLicenseIdentifierPrefix}[ const triggerAccumulationRegExp = copyrightTailRegExp.concat(spdxLicenseIdentifierTailRegExp); -export const getLicenseMatcher = function (licenses: License[], startLinesExcludes?: string[]) { +export const getLicenseMatcher = function (licenses: License[], startLinesExcludes?: string[]): LicenceMatcher { const bufSize = Math.max( 4096, ...licenses.map(({ text, match }) => { @@ -299,6 +299,34 @@ export const ensureLicensesInResult = function ({ } }; +/** + * If a product is mentioned in this file, + * ensure that it is the correct product, + * and not a copy-paste error from a different product. + */ +export const ensureProductInFile = function (filePath: string, product: string | undefined): Error | undefined { + if (!product) return; + const lines = fs.readFileSync(filePath, "utf8").split(/\r?\n/); + for (const regexp of [ + new RegExp("This file is part of (.*)\\."), + new RegExp("// (.*) is free software"), + new RegExp("// (.*) is distributed in the hope"), + new RegExp("// along with (.+?)\\.(.*)gnu.org"), + ]) { + for (const line of lines) { + if (regexp.test(line)) { + const matches = regexp.exec(line); + assert(matches); + if (matches[1] !== product && matches[1].toLowerCase() !== "this program") { + return new Error( + `Product mismatch in ${filePath}. Expected "${product}", detected "${matches[1]}" in line: "${line}".`, + ); + } + } + } + } +}; + export const throwLicensingErrors = function (licensingErrors: Error[]) { if (licensingErrors.length === 0) return; throw new Error( diff --git a/license-scanner/main.ts b/license-scanner/main.ts index a5fe90b..cdedc88 100755 --- a/license-scanner/main.ts +++ b/license-scanner/main.ts @@ -47,6 +47,12 @@ program "If configured, the scan will make sure that all scanned files are licensed with any license.", ).conflicts("ensureLicenses"), ) + .addOption( + new Option( + "--ensure-product ", + "If configured, the scan will make sure the product mentioned in the license headers is correct.", + ), + ) .option("--exclude ", "Can be used to exclude files or directories from the scan.") // It's actually correct usage but @commander-js/extra-typings is wrong on this one. // eslint-disable-next-line @typescript-eslint/no-misused-promises @@ -60,6 +66,7 @@ program exclude: options.exclude ?? [], logLevel: options.logLevel as LogLevel, ensureLicenses: readEnsureLicenses(options), + ensureProduct: options.ensureProduct, }); } catch (e: any) { logger.debug(e.stack); diff --git a/license-scanner/scanner.ts b/license-scanner/scanner.ts index cbdb706..afd5015 100644 --- a/license-scanner/scanner.ts +++ b/license-scanner/scanner.ts @@ -2,7 +2,7 @@ import assert from "assert"; import { dirname, join as joinPath, relative as relativePath } from "path"; import { getOrDownloadCrate, getVersionedCrateName } from "./crate"; -import { ensureLicensesInResult } from "./license"; +import { ensureLicensesInResult, ensureProductInFile } from "./license"; import { getOrDownloadRepository } from "./repository"; import { scanQueue, scanQueueSize } from "./synchronization"; import { @@ -133,6 +133,7 @@ export const scan = async function (options: ScanOptions): Promise { tracker, logger, ensureLicenses = false, + ensureProduct, } = options; const licensingErrors: Error[] = []; @@ -179,6 +180,8 @@ export const scan = async function (options: ScanOptions): Promise { const result = await matchLicense(file.path); const licensingError = ensureLicensesInResult({ file, result, ensureLicenses }); if (licensingError) licensingErrors.push(licensingError); + const productError = ensureProductInFile(file.path, ensureProduct); + if (productError) licensingErrors.push(productError); if (result === undefined) { return; } diff --git a/license-scanner/types.ts b/license-scanner/types.ts index 0ff1c0e..bf275d4 100644 --- a/license-scanner/types.ts +++ b/license-scanner/types.ts @@ -55,6 +55,8 @@ export type ScanResult = { licensingErrors: Error[]; }; +export type LicenceMatcher = (file: string) => Promise; + export type ScanOptions = { saveResult: (projectId: string, filePathFromRoot: string, result: ScanResultItem) => Promise; root: string; @@ -64,7 +66,7 @@ export type ScanOptions = { repositories: string; crates: string; }; - matchLicense: (file: string) => Promise; + matchLicense: LicenceMatcher; rust: ScanOptionsRust | null; transformItemKey?: (str: string) => string; tracker: ScanTracker; @@ -77,6 +79,11 @@ export type ScanOptions = { * all source files have one of those licenses detected. */ ensureLicenses?: boolean | string[]; + /** + * If true, the scan will make sure that + * the license headers contain the correct product name. + */ + ensureProduct?: string | undefined; }; export type LicenseInput = { @@ -150,6 +157,7 @@ export interface ScanCliArgs { detectionOverrides: DetectionOverride[]; logLevel: LogLevel; ensureLicenses: boolean | string[]; + ensureProduct: string | undefined; } export interface DumpCliArgs { From 8c4154dfaaf52f0c9c68edd2eacad100626aad08 Mon Sep 17 00:00:00 2001 From: rzadp Date: Tue, 10 Sep 2024 13:35:37 +0200 Subject: [PATCH 2/8] minimize diff --- license-scanner/license.ts | 4 ++-- license-scanner/types.ts | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/license-scanner/license.ts b/license-scanner/license.ts index 0d9093b..ebe8717 100644 --- a/license-scanner/license.ts +++ b/license-scanner/license.ts @@ -2,7 +2,7 @@ import assert from "assert"; import fs from "fs"; import { promisify } from "util"; -import { EnsureLicensesInResultOptions, LicenceMatcher, License, LicenseInput } from "./types"; +import { EnsureLicensesInResultOptions, License, LicenseInput } from "./types"; import { isBinaryFile, loadFiles } from "./utils"; const openAsync = promisify(fs.open); @@ -79,7 +79,7 @@ const spdxLicenseIdentifierMatcher = new RegExp(`${spdxLicenseIdentifierPrefix}[ const triggerAccumulationRegExp = copyrightTailRegExp.concat(spdxLicenseIdentifierTailRegExp); -export const getLicenseMatcher = function (licenses: License[], startLinesExcludes?: string[]): LicenceMatcher { +export const getLicenseMatcher = function (licenses: License[], startLinesExcludes?: string[]) { const bufSize = Math.max( 4096, ...licenses.map(({ text, match }) => { diff --git a/license-scanner/types.ts b/license-scanner/types.ts index bf275d4..4746224 100644 --- a/license-scanner/types.ts +++ b/license-scanner/types.ts @@ -55,8 +55,6 @@ export type ScanResult = { licensingErrors: Error[]; }; -export type LicenceMatcher = (file: string) => Promise; - export type ScanOptions = { saveResult: (projectId: string, filePathFromRoot: string, result: ScanResultItem) => Promise; root: string; @@ -66,7 +64,7 @@ export type ScanOptions = { repositories: string; crates: string; }; - matchLicense: LicenceMatcher; + matchLicense: (file: string) => Promise; rust: ScanOptionsRust | null; transformItemKey?: (str: string) => string; tracker: ScanTracker; From e6033052abdf2829f43b7544c81a3f9cde654501 Mon Sep 17 00:00:00 2001 From: rzadp Date: Tue, 10 Sep 2024 13:54:05 +0200 Subject: [PATCH 3/8] test on ci --- .github/workflows/CI.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 651260c..36c29a5 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -74,3 +74,15 @@ jobs: grep -q 'No license detected in reconstruct.rs. Exact file path:' ./out.txt grep -q 'No license detected in mod.rs. Exact file path:' ./out.txt working-directory: polkadot + - name: Enforce product references in headers + run: | + license-scanner scan \ + --ensure-product 'Polkadot' \ + -- ./polkadot/xcm/src/lib.rs \ + 2>out.txt \ + && exit 1 || exit 0 + # We expected it to fail because there are some copy-paste errors. + + grep -q 'Product mismatch' ./out.txt + grep -q 'Expected "Polkadot", detected "Substrate" in line:' ./out.txt + working-directory: polkadot From ed5c425139f2a086bf20eb36977d2cac8f8603c2 Mon Sep 17 00:00:00 2001 From: rzadp Date: Tue, 10 Sep 2024 14:21:44 +0200 Subject: [PATCH 4/8] fixup test --- .github/workflows/CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 36c29a5..39db00b 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -68,7 +68,7 @@ jobs: --exclude ./**/target ./**/weights \ -- ./**/src/**/*.rs \ 2>out.txt \ - && exit 1 || exit 0 + && exit 1 || true # We expected it to fail because there are some unlicensed files left. grep -q 'No license detected in reconstruct.rs. Exact file path:' ./out.txt @@ -80,7 +80,7 @@ jobs: --ensure-product 'Polkadot' \ -- ./polkadot/xcm/src/lib.rs \ 2>out.txt \ - && exit 1 || exit 0 + && exit 1 || true # We expected it to fail because there are some copy-paste errors. grep -q 'Product mismatch' ./out.txt From 25fa7887d52cadce82c37d0dced2e896e5f56fa9 Mon Sep 17 00:00:00 2001 From: rzadp Date: Tue, 10 Sep 2024 14:25:51 +0200 Subject: [PATCH 5/8] path fixup --- .github/workflows/CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 39db00b..dacb31f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -78,7 +78,7 @@ jobs: run: | license-scanner scan \ --ensure-product 'Polkadot' \ - -- ./polkadot/xcm/src/lib.rs \ + -- ./xcm/src/lib.rs \ 2>out.txt \ && exit 1 || true # We expected it to fail because there are some copy-paste errors. From c81ca270077274f1ddf6cbbd17a3479a918db0c5 Mon Sep 17 00:00:00 2001 From: Przemek Rzad Date: Fri, 13 Sep 2024 10:31:07 +0200 Subject: [PATCH 6/8] Update .github/workflows/CI.yml Co-authored-by: Yuri Volkov <0@mcornholio.ru> --- .github/workflows/CI.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index dacb31f..af364f4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -76,11 +76,11 @@ jobs: working-directory: polkadot - name: Enforce product references in headers run: | - license-scanner scan \ + set -e + ! license-scanner scan \ --ensure-product 'Polkadot' \ -- ./xcm/src/lib.rs \ - 2>out.txt \ - && exit 1 || true + 2>out.txt # We expected it to fail because there are some copy-paste errors. grep -q 'Product mismatch' ./out.txt From fea7952f66af919b73e9428bf12bf97ae4a50455 Mon Sep 17 00:00:00 2001 From: rzadp Date: Fri, 13 Sep 2024 10:39:53 +0200 Subject: [PATCH 7/8] no need for set -e --- .github/workflows/CI.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index af364f4..077e2a4 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -76,7 +76,6 @@ jobs: working-directory: polkadot - name: Enforce product references in headers run: | - set -e ! license-scanner scan \ --ensure-product 'Polkadot' \ -- ./xcm/src/lib.rs \ From 4ca8540c8e706b618ea737e16faaac374d611d1e Mon Sep 17 00:00:00 2001 From: rzadp Date: Fri, 13 Sep 2024 10:39:59 +0200 Subject: [PATCH 8/8] change the other line as well --- .github/workflows/CI.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 077e2a4..958b513 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -63,12 +63,11 @@ jobs: run: | shopt -s globstar - license-scanner scan \ + ! license-scanner scan \ --ensure-licenses Apache-2.0 GPL-3.0-only \ --exclude ./**/target ./**/weights \ -- ./**/src/**/*.rs \ - 2>out.txt \ - && exit 1 || true + 2>out.txt # We expected it to fail because there are some unlicensed files left. grep -q 'No license detected in reconstruct.rs. Exact file path:' ./out.txt