Skip to content

Commit

Permalink
Update generate-copyright
Browse files Browse the repository at this point in the history
This tool now scans for cargo dependencies and includes any important looking license files.

We do this because cargo package metadata is not sufficient - the Apache-2.0 license says you have to include any NOTICE file, for example. And authors != copyright holders (cargo has the former, we must include the latter).
  • Loading branch information
jonathanpallant committed Aug 6, 2024
1 parent 93ea767 commit ba0d6c9
Show file tree
Hide file tree
Showing 7 changed files with 320 additions and 12 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,8 @@ dependencies = [
"anyhow",
"serde",
"serde_json",
"tempfile",
"thiserror",
]

[[package]]
Expand Down
2 changes: 2 additions & 0 deletions src/bootstrap/src/core/build_steps/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ impl Step for GenerateCopyright {
let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
cmd.env("OUT_DIR", &builder.out);
cmd.env("CARGO", &builder.initial_cargo);
cmd.run(builder);

dest
Expand Down
2 changes: 2 additions & 0 deletions src/tools/collect-license-metadata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
name = "collect-license-metadata"
version = "0.1.0"
edition = "2021"
description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed"
license = "MIT OR Apache-2.0"

[dependencies]
anyhow = "1.0.65"
Expand Down
5 changes: 5 additions & 0 deletions src/tools/collect-license-metadata/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ use anyhow::Error;

use crate::licenses::LicensesInterner;

/// The entry point to the binary.
///
/// You should probably let `bootstrap` execute this program instead of running it directly.
///
/// Run `x.py run collect-license-metadata`
fn main() -> Result<(), Error> {
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
Expand Down
3 changes: 3 additions & 0 deletions src/tools/generate-copyright/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
name = "generate-copyright"
version = "0.1.0"
edition = "2021"
description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.65"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
thiserror = "1"
tempfile = "3"
196 changes: 196 additions & 0 deletions src/tools/generate-copyright/src/cargo_metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
//! Gets metadata about a workspace from Cargo

use std::collections::{BTreeMap, BTreeSet};
use std::ffi::{OsStr, OsString};
use std::path::Path;

/// Describes how this module can fail
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Failed to run cargo metadata: {0:?}")]
LaunchingMetadata(#[from] std::io::Error),
#[error("Failed get output from cargo metadata: {0:?}")]
GettingMetadata(String),
#[error("Failed parse JSON output from cargo metadata: {0:?}")]
ParsingJson(#[from] serde_json::Error),
#[error("Failed find expected JSON element {0} in output from cargo metadata")]
MissingJsonElement(&'static str),
#[error("Failed find expected JSON element {0} in output from cargo metadata for package {1}")]
MissingJsonElementForPackage(String, String),
#[error("Failed to run cargo vendor: {0:?}")]
LaunchingVendor(std::io::Error),
#[error("Failed to complete cargo vendor")]
RunningVendor,
}

/// Describes one of our dependencies
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Dependency {
/// The name of the package
pub name: String,
/// The version number
pub version: String,
/// The license it is under
pub license: String,
/// The list of authors from the package metadata
pub authors: Vec<String>,
/// A list of important files from the package, with their contents.
///
/// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.
pub notices: BTreeMap<OsString, String>,
}

/// Use `cargo` to get a list of dependencies and their license data.
///
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
/// grab the license files.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get(
cargo: &Path,
dest: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeSet<Dependency>, Error> {
let mut temp_set = BTreeSet::new();
// Look at the metadata for each manifest
for manifest_path in manifest_paths {
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
panic!("cargo_manifest::get requires a path to a Cargo.toml file");
}
let metadata_json = get_metadata_json(cargo, manifest_path)?;
let packages = metadata_json["packages"]
.as_array()
.ok_or_else(|| Error::MissingJsonElement("packages array"))?;
for package in packages {
let package =
package.as_object().ok_or_else(|| Error::MissingJsonElement("package object"))?;
let manifest_path = package
.get("manifest_path")
.and_then(|v| v.as_str())
.map(Path::new)
.ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?;
if manifest_path.starts_with(&root_path) {
// it's an in-tree dependency and reuse covers it
continue;
}
// otherwise it's an out-of-tree dependency
let get_string = |field_name: &str, package_name: &str| {
package.get(field_name).and_then(|v| v.as_str()).ok_or_else(|| {
Error::MissingJsonElementForPackage(
format!("package.{field_name}"),
package_name.to_owned(),
)
})
};
let name = get_string("name", "unknown")?;
let license = get_string("license", name)?;
let version = get_string("version", name)?;
let authors_list = package
.get("authors")
.and_then(|v| v.as_array())
.ok_or_else(|| Error::MissingJsonElement("package.authors"))?;
let authors: Vec<String> =
authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect();
temp_set.insert(Dependency {
name: name.to_owned(),
version: version.to_owned(),
license: license.to_owned(),
authors,
notices: BTreeMap::new(),
});
}
}

// Now do a cargo-vendor and grab everything
let vendor_path = dest.join("vendor");
println!("Vendoring deps into {}...", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;

// Now for each dependency we found, go and grab any important looking files
let mut output = BTreeSet::new();
for mut dep in temp_set {
load_important_files(&mut dep, &vendor_path)?;
output.insert(dep);
}

Ok(output)
}

/// Get cargo-metdata for a package, as JSON
fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result<serde_json::Value, Error> {
let metadata_output = std::process::Command::new(cargo)
.arg("metadata")
.arg("--format-version=1")
.arg("--all-features")
.arg("--manifest-path")
.arg(manifest_path)
.env("RUSTC_BOOTSTRAP", "1")
.output()
.map_err(|e| Error::LaunchingMetadata(e))?;
if !metadata_output.status.success() {
return Err(Error::GettingMetadata(
String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"),
));
}
let json = serde_json::from_slice(&metadata_output.stdout)?;
Ok(json)
}

/// Run cargo-vendor, fetching into the given dir
fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {
let mut vendor_command = std::process::Command::new(cargo);
vendor_command.env("RUSTC_BOOTSTRAP", "1");
vendor_command.arg("vendor");
vendor_command.arg("--quiet");
vendor_command.arg("--versioned-dirs");
for manifest_path in manifest_paths {
vendor_command.arg("-s");
vendor_command.arg(manifest_path);
}
vendor_command.arg(dest);

let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?;

if !vendor_status.success() {
return Err(Error::RunningVendor);
}

Ok(())
}

/// Add important files off disk into this dependency.
///
/// Maybe one-day Cargo.toml will contain enough information that we don't need
/// to do this manual scraping.
fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> {
let name_version = format!("{}-{}", dep.name, dep.version);
println!("Scraping notices for {}...", name_version);
let dep_vendor_path = vendor_root.join(name_version);
for entry in std::fs::read_dir(dep_vendor_path)? {
let entry = entry?;
let metadata = entry.metadata()?;
let path = entry.path();
if let Some(filename) = path.file_name() {
let lc_filename = filename.to_ascii_lowercase();
let lc_filename_str = lc_filename.to_string_lossy();
let mut keep = false;
for m in ["copyright", "licence", "license", "author", "notice"] {
if lc_filename_str.contains(m) {
keep = true;
break;
}
}
if keep {
if metadata.is_dir() {
// scoop up whole directory
} else if metadata.is_file() {
println!("Scraping {}", filename.to_string_lossy());
dep.notices.insert(filename.to_owned(), std::fs::read_to_string(path)?);
}
}
}
}
Ok(())
}
Loading

0 comments on commit ba0d6c9

Please sign in to comment.