From 78da3b6d0b95d68a8cc93c0eea161777900160f2 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Thu, 4 Jul 2024 18:34:35 +0100 Subject: [PATCH 1/2] generate-copyright now scans for cargo dependencies. --- Cargo.lock | 1 + src/bootstrap/src/core/build_steps/run.rs | 1 + src/tools/generate-copyright/Cargo.toml | 2 + .../generate-copyright/src/cargo_metadata.rs | 104 ++++++++++++++++++ src/tools/generate-copyright/src/main.rs | 54 ++++++++- 5 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 src/tools/generate-copyright/src/cargo_metadata.rs diff --git a/Cargo.lock b/Cargo.lock index 96cef9070842e..2b30987107b36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1552,6 +1552,7 @@ dependencies = [ "anyhow", "serde", "serde_json", + "thiserror", ] [[package]] diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs index 22d5efa5d95dd..630d6338f39ef 100644 --- a/src/bootstrap/src/core/build_steps/run.rs +++ b/src/bootstrap/src/core/build_steps/run.rs @@ -218,6 +218,7 @@ impl Step for GenerateCopyright { let mut cmd = builder.tool_cmd(Tool::GenerateCopyright); cmd.env("LICENSE_METADATA", &license_metadata); cmd.env("DEST", &dest); + cmd.env("CARGO", &builder.initial_cargo); builder.run(cmd); dest diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml index 899ef0f8a6c26..ac97ad084ea06 100644 --- a/src/tools/generate-copyright/Cargo.toml +++ b/src/tools/generate-copyright/Cargo.toml @@ -2,6 +2,7 @@ name = "generate-copyright" version = "0.1.0" edition = "2021" +description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -9,3 +10,4 @@ edition = "2021" anyhow = "1.0.65" serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.85" +thiserror = "1" diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs new file mode 100644 index 0000000000000..d610ddbafb99a --- /dev/null +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -0,0 +1,104 @@ +//! Gets metadata about a workspace from Cargo + +/// Describes how this module can fail +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Failed to run cargo metadata: {0:?}")] + Launching(#[from] std::io::Error), + #[error("Failed get output from cargo metadata: {0:?}")] + GettingMetadata(String), + #[error("Failed parse JSON output from cargo metadata: {0:?}")] + ParsingJson(#[from] serde_json::Error), + #[error("Failed find expected JSON element {0} in output from cargo metadata")] + MissingJsonElement(&'static str), + #[error("Failed find expected JSON element {0} in output from cargo metadata for package {1}")] + MissingJsonElementForPackage(String, String), +} + +/// Describes one of our dependencies +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Dependency { + /// The name of the package + pub name: String, + /// The version number + pub version: String, + /// The license it is under + pub license: String, + /// The list of authors from the package metadata + pub authors: Vec, +} + +/// Use `cargo` to get a list of dependencies and their license data +/// +/// Any dependency with a path beginning with `root_path` is ignored, as we +/// assume `reuse` has covered it already. +pub fn get( + cargo: &std::path::Path, + manifest_path: &std::path::Path, + root_path: &std::path::Path, +) -> Result, Error> { + if manifest_path.file_name() != Some(std::ffi::OsStr::new("Cargo.toml")) { + panic!("cargo_manifest::get requires a path to a Cargo.toml file"); + } + let metadata_output = std::process::Command::new(cargo) + .arg("metadata") + .arg("--format-version=1") + .arg("--all-features") + .arg("--manifest-path") + .arg(manifest_path) + .env("RUSTC_BOOTSTRAP", "1") + .output() + .map_err(|e| Error::Launching(e))?; + if !metadata_output.status.success() { + return Err(Error::GettingMetadata( + String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"), + )); + } + let metadata_json: serde_json::Value = serde_json::from_slice(&metadata_output.stdout)?; + let packages = metadata_json["packages"] + .as_array() + .ok_or_else(|| Error::MissingJsonElement("packages array"))?; + let mut v = Vec::new(); + for package in packages { + let package = + package.as_object().ok_or_else(|| Error::MissingJsonElement("package object"))?; + // println!("Package: {}", serde_json::to_string_pretty(package).expect("JSON encoding")); + let manifest_path = package + .get("manifest_path") + .and_then(|v| v.as_str()) + .map(std::path::Path::new) + .ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?; + if manifest_path.starts_with(&root_path) { + // it's an in-tree dependency and reuse covers it + continue; + } + // otherwise it's an out-of-tree dependency + let get_string = |field_name: &str, package_name: &str| { + package.get(field_name).and_then(|v| v.as_str()).ok_or_else(|| { + Error::MissingJsonElementForPackage( + format!("package.{field_name}"), + package_name.to_owned(), + ) + }) + }; + + let name = get_string("name", "unknown")?; + let license = get_string("license", name)?; + let version = get_string("version", name)?; + let authors_list = package + .get("authors") + .and_then(|v| v.as_array()) + .ok_or_else(|| Error::MissingJsonElement("package.authors"))?; + let authors: Vec = + authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect(); + + v.push(Dependency { + name: name.to_owned(), + version: version.to_owned(), + license: license.to_owned(), + authors, + }) + } + + Ok(v) +} diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index 558e87290b0d8..521a9cfc86219 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -1,22 +1,56 @@ use anyhow::Error; use std::collections::BTreeSet; use std::io::Write; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; +mod cargo_metadata; + +/// The entry point to the binary. +/// +/// You should probably let `bootstrap` execute this program instead of running it directly. +/// +/// Run `x.py run generate-metadata` fn main() -> Result<(), Error> { let dest = env_path("DEST")?; + let cargo = env_path("CARGO")?; let license_metadata = env_path("LICENSE_METADATA")?; let metadata: Metadata = serde_json::from_slice(&std::fs::read(&license_metadata)?)?; + let mut deps_set = BTreeSet::new(); + + let root_path = std::path::absolute(".")?; + for dep in cargo_metadata::get(&cargo, Path::new("./Cargo.toml"), &root_path)? { + deps_set.insert(dep); + } + for dep in cargo_metadata::get(&cargo, Path::new("./src/tools/cargo/Cargo.toml"), &root_path)? { + deps_set.insert(dep); + } + for dep in cargo_metadata::get(&cargo, Path::new("./library/std/Cargo.toml"), &root_path)? { + deps_set.insert(dep); + } + let mut buffer = Vec::new(); + + write!( + buffer, + "# In-tree files\n\nThe following licenses cover the in-tree source files that were used in this release:\n\n" + )?; render_recursive(&metadata.files, &mut buffer, 0)?; + write!( + buffer, + "\n# Out-of-tree files\n\nThe following licenses cover the out-of-tree crates that were used in this release:\n\n" + )?; + render_deps(deps_set.iter(), &mut buffer)?; + std::fs::write(&dest, &buffer)?; Ok(()) } +/// Recursively draw the tree of files/folders we found on disk and their licences, as +/// markdown, into the given Vec. fn render_recursive(node: &Node, buffer: &mut Vec, depth: usize) -> Result<(), Error> { let prefix = std::iter::repeat("> ").take(depth + 1).collect::(); @@ -56,6 +90,7 @@ fn render_recursive(node: &Node, buffer: &mut Vec, depth: usize) -> Result<( Ok(()) } +/// Draw a series of sibling files/folders, as markdown, into the given Vec. fn render_license<'a>( prefix: &str, names: impl Iterator, @@ -85,6 +120,23 @@ fn render_license<'a>( Ok(()) } +/// Render a list of out-of-tree dependencies as markdown into the given Vec. +fn render_deps<'a, 'b>( + deps: impl Iterator, + buffer: &'b mut Vec, +) -> Result<(), Error> { + for dep in deps { + let authors_list = dep.authors.join(", "); + let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version); + writeln!( + buffer, + "* [{} {}]({}) ({}), by {}", + dep.name, dep.version, url, dep.license, authors_list + )?; + } + Ok(()) +} + #[derive(serde::Deserialize)] struct Metadata { files: Node, From ad8a989b76fa157eb23d079a16b80946b7126e08 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Thu, 4 Jul 2024 18:35:06 +0100 Subject: [PATCH 2/2] Add extra descriptive comments to collect-license-metadata. --- src/tools/collect-license-metadata/Cargo.toml | 2 ++ src/tools/collect-license-metadata/src/main.rs | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/src/tools/collect-license-metadata/Cargo.toml b/src/tools/collect-license-metadata/Cargo.toml index d0820cfc2a0e4..edf9e5c5393ea 100644 --- a/src/tools/collect-license-metadata/Cargo.toml +++ b/src/tools/collect-license-metadata/Cargo.toml @@ -2,6 +2,8 @@ name = "collect-license-metadata" version = "0.1.0" edition = "2021" +description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed" +license = "MIT OR Apache-2.0" [dependencies] anyhow = "1.0.65" diff --git a/src/tools/collect-license-metadata/src/main.rs b/src/tools/collect-license-metadata/src/main.rs index cbe94af3510aa..a074c0c4fea11 100644 --- a/src/tools/collect-license-metadata/src/main.rs +++ b/src/tools/collect-license-metadata/src/main.rs @@ -16,6 +16,11 @@ use std::path::PathBuf; const CONDENSED_DIRECTORIES: &[(&str, &str)] = &[("./src/llvm-project/", "./src/llvm-project/README.md")]; +/// The entry point to the binary. +/// +/// You should probably let `bootstrap` execute this program instead of running it directly. +/// +/// Run `x.py run collect-license-metadata` fn main() -> Result<(), Error> { let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into(); let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();