diff --git a/CHANGELOG.md b/CHANGELOG.md index 84571f911..c7c556744 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed - Improve parsing of non-UTF-8 encoded pom.xml files +- `SPDX` parsing adding the described package as a dependency +- `SPDX` parsing certain text files with optional package fields ## 6.2.0 - 2024-03-19 @@ -57,7 +59,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - Renamed multiple CLI arguments to avoid the term `lockfile` in places where - manifests are also accepted + manifests are also accepted - Renamed `lockfiles` key in `phylum status --json` output to `dependency_files` ## 5.9.0 - 2023-12-05 @@ -99,7 +101,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - Automatic manifest resolution with `init`, `parse`, `analyze`, and `status` - will no longer return manifests in subdirectories of other manifests + will no longer return manifests in subdirectories of other manifests ### Fixed @@ -339,7 +341,7 @@ before, the existing project ID will be re-linked. ### Fixed - Fix parser lockfile consistency by @cd-work (#882) -- Add deno.window lib reference to extension\_api.ts by @kylewillmon (#890) +- Add deno.window lib reference to extension_api.ts by @kylewillmon (#890) ## 4.1.0 - 2022-12-20 @@ -375,7 +377,7 @@ before, the existing project ID will be re-linked. - `phylum auth set-token` by @kylewillmon (#786) - Add `--lockfile-type` option to `phylum analyze` by @cd-work (#798) - Add `phylum init` subcommand by @cd-work (#801) -- Add lockfile path and type to .phylum\_project by @cd-work (#806) +- Add lockfile path and type to .phylum_project by @cd-work (#806) - Add `unsandboxed_run` manifest permission by @cd-work (#777) - Add group member management subcommands by @cd-work (#809) @@ -492,7 +494,7 @@ before, the existing project ID will be re-linked. ### Fixed -- Fix PHYLUM\_API\_KEY overwriting config token by @cd-work in #631 +- Fix PHYLUM_API_KEY overwriting config token by @cd-work in #631 - Fix parsing gradle lockfile without classpath by @cd-work in #627 - Fix link dependencies in yarn parser by @cd-work in #621 @@ -565,7 +567,7 @@ before, the existing project ID will be re-linked. - Ignore certs everywhere when requested by @kylewillmon (#389) - Remove Web UI link from analyze output by @cd-work (#397) - Don't use streaming parsers by @kylewillmon (#401) -- Bump phylum\_types version by @kylewillmon (#409) +- Bump phylum_types version by @kylewillmon (#409) ## 3.4.0 - 2022-05-19 @@ -589,7 +591,7 @@ before, the existing project ID will be re-linked. ### Fixed - Fix non-frozen Pipfile suffix by @cd-work (#366) -- Use new endpoint for ping by @kylewillmon (#369) +- Use new endpoint for ping by @kylewillmon (#369) ## 3.2.0 - 2022-05-06 @@ -665,7 +667,7 @@ before, the existing project ID will be re-linked. - Continue install/upgrade even if quarantine flag isn't found by @kylewillmon (#249) - Replace Language/Type with Ecosystem by @cd-work (#248) -- Use git\_version for version numbers by @kylewillmon (#243) +- Use git_version for version numbers by @kylewillmon (#243) - Use Ecosystem in `phylum package` output by @cd-work (#255) - Add support for new npm package-lock format by @cd-work (#242) @@ -696,7 +698,7 @@ before, the existing project ID will be re-linked. - Bring Oauth Support to CLI by @DanielJoyce (#118) - Better error handling by @DanielJoyce (#145) -- Swap out static\_init module for lazy\_static by @DanielJoyce (#146) +- Swap out static_init module for lazy_static by @DanielJoyce (#146) - Gather files from static builder by @louislang (#147) - Adding release script by @eeclfrei (#150) - Updates for recent api changes by @eeclfrei (#160) diff --git a/lockfile/src/parsers/spdx.rs b/lockfile/src/parsers/spdx.rs index 5bb7a7e5c..bee4d1a72 100644 --- a/lockfile/src/parsers/spdx.rs +++ b/lockfile/src/parsers/spdx.rs @@ -1,18 +1,78 @@ use nom::branch::alt; -use nom::bytes::complete::{tag, take_until, take_while}; +use nom::bytes::complete::{tag, take_till, take_until, take_while}; use nom::character::complete::{line_ending, multispace0, not_line_ending, space0}; use nom::combinator::{eof, map_opt, opt, recognize}; use nom::error::{context, VerboseError, VerboseErrorKind}; -use nom::multi::{many1, many_till}; -use nom::sequence::{delimited, tuple}; +use nom::multi::{many0, many1, many_till}; +use nom::sequence::{delimited, preceded, tuple}; use nom::Err as NomErr; use crate::parsers::{take_till_blank_line, take_till_line_end, IResult}; -use crate::spdx::{ExternalRefs, PackageInformation, ReferenceCategory}; +use crate::spdx::{ExternalRefs, PackageInformation, ReferenceCategory, Relationship, SpdxInfo}; -pub(crate) fn parse(input: &str) -> IResult<&str, Vec> { - let (i, pkgs_info) = many1(package)(input)?; - Ok((i, pkgs_info)) +pub(crate) fn parse(input: &str) -> IResult<&str, SpdxInfo> { + let (_, relationships) = parse_relationships(input)?; + let (_, document_describes) = parse_document_describes(input)?; + let (i, spdx_id) = parse_spdx_id(input)?; + let (i, packages) = many1(package)(i)?; + + Ok((i, SpdxInfo { spdx_id: spdx_id.into(), document_describes, packages, relationships })) +} + +fn parse_spdx_id(input: &str) -> IResult<&str, &str> { + let (i, _) = skip_until_tag(input, "SPDXID:")?; + let (i, spdx_id) = take_till_line_end(i)?; + Ok((i, spdx_id.trim())) +} + +fn parse_document_describes(input: &str) -> IResult<&str, Vec> { + let (i, describes) = opt(preceded( + take_until("DocumentDescribes:"), + take_till(|c| c == '\n' || c == '\r'), + ))(input)?; + + let describes_list = if let Some(describes_str) = describes { + describes_str + .trim_start_matches("DocumentDescribes:") + .trim() + .split(',') + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(String::from) + .collect() + } else { + Vec::new() + }; + + Ok((i, describes_list)) +} + +fn skip_until_tag<'a>(input: &'a str, line_tag: &'a str) -> IResult<&'a str, ()> { + let (i, _) = take_until(line_tag)(input)?; + let (i, _) = tag(line_tag)(i)?; + Ok((i, ())) +} + +fn parse_relationships(input: &str) -> IResult<&str, Vec> { + many0(parse_relationship)(input) +} + +fn parse_relationship(input: &str) -> IResult<&str, Relationship> { + let (i, _) = skip_until_tag(input, "Relationship:")?; + let (i, rel) = recognize(ws(take_till_line_end))(i)?; + + let parts: Vec<&str> = rel.split_whitespace().collect(); + if parts.len() == 3 { + Ok((i, Relationship { + spdx_element_id: Some(parts[0].to_string()), + relationship_type: Some(parts[1].to_string()), + related_spdx_element: Some(parts[2].to_string()), + })) + } else { + let kind = VerboseErrorKind::Context("Invalid relationship format"); + let error = VerboseError { errors: vec![(input, kind)] }; + Err(NomErr::Failure(error)) + } } fn package_name(input: &str) -> IResult<&str, &str> { @@ -41,37 +101,37 @@ fn parse_package(input: &str) -> IResult<&str, PackageInformation> { fn package_info(input: &str) -> IResult<&str, PackageInformation> { let (i, _) = package_name(input)?; - // PackageName is required + // PackageName is required. let (i, _) = tag("PackageName:")(i)?; let (i, name) = recognize(ws(take_till_line_end))(i)?; - // SPDXID is required + // SPDXID is required. let (i, _) = tag("SPDXID:")(i)?; - let (tail, _) = recognize(ws(take_till_line_end))(i)?; + let (tail, spdx_id) = recognize(ws(take_till_line_end))(i)?; - // PackageVersion is optional + // PackageVersion is optional. // Version can be obtained from PURL if present, so we don't return an error - // here + // here. let (i, has_version) = opt(tag("PackageVersion:"))(tail)?; let (i, v) = recognize(ws(take_till_line_end))(i)?; let version = has_version.map(|_| v.trim().to_string()); - // Update input + // Update input. let i = match version { Some(_) => i, None => tail, }; - // PackageDownloadLocation is required - let (i, _) = tag("PackageDownloadLocation:")(i)?; + // PackageDownloadLocation is required. + let (i, _) = skip_until_tag(i, "PackageDownloadLocation:")?; let (i, download_location) = recognize(ws(take_till_line_end))(i)?; - // Look for external references + // Look for external references. let (i, next_input) = extern_ref(i)?; let (_, external_ref) = opt(recognize(ws(take_till_line_end)))(i)?; - // Package name + // Package name. let name = name.trim(); if let Some(external_ref) = external_ref { @@ -79,6 +139,7 @@ fn package_info(input: &str) -> IResult<&str, PackageInformation> { Ok((next_input, PackageInformation { name: name.into(), + spdx_id: spdx_id.trim().into(), version_info: version, download_location: download_location.into(), external_refs: vec![external_ref], diff --git a/lockfile/src/spdx.rs b/lockfile/src/spdx.rs index 40d791047..1c52c031f 100644 --- a/lockfile/src/spdx.rs +++ b/lockfile/src/spdx.rs @@ -18,16 +18,26 @@ use crate::{ #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] -struct SpdxInfo { - packages: Vec, +pub(crate) struct SpdxInfo { + #[serde(rename = "SPDXID")] + pub(crate) spdx_id: String, + // Deprecated in v2.3 but kept for v2.2 compatability. + #[serde(default)] + pub(crate) document_describes: Vec, + pub(crate) packages: Vec, + #[serde(default)] + pub(crate) relationships: Vec, } #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub(crate) struct PackageInformation { pub(crate) name: String, + #[serde(rename = "SPDXID")] + pub(crate) spdx_id: String, pub(crate) version_info: Option, pub(crate) download_location: String, + #[serde(default)] pub(crate) external_refs: Vec, } @@ -54,6 +64,14 @@ pub(crate) enum ReferenceCategory { Unknown, } +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub(crate) struct Relationship { + pub(crate) spdx_element_id: Option, + pub(crate) related_spdx_element: Option, + pub(crate) relationship_type: Option, +} + fn type_from_url(url: &str) -> anyhow::Result { if url.starts_with("https://registry.npmjs.org") | url.starts_with("https://registry.yarnpkg.com") @@ -105,7 +123,7 @@ fn from_locator(registry: &str, locator: &str) -> anyhow::Result { PackageType::Nuget => locator.rsplit_once('/'), PackageType::Maven => locator.rsplit_once(':').filter(|(name, _)| name.contains(':')), _ => { - // Not in the spec, but included for compatibility with our API + // Not in the spec, but included for compatibility with our API. locator.rsplit_once('@') }, } @@ -145,16 +163,35 @@ pub struct Spdx; impl Parse for Spdx { fn parse(&self, data: &str) -> anyhow::Result> { - let packages_info = if let Ok(lock) = serde_json::from_str::(data) { - serde_json::from_value::(lock)?.packages + let spdx_info = if let Ok(lock) = serde_json::from_str::(data) { + serde_json::from_value::(lock)? } else if let Ok(lock) = serde_yaml::from_str::(data) { - serde_yaml::from_value::(lock)?.packages + serde_yaml::from_value::(lock)? } else { spdx::parse(data).finish().map_err(|e| anyhow!(convert_error(data, e)))?.1 }; + let spdx_ids: Vec<_> = spdx_info + .relationships + .into_iter() + .filter_map(|r| { + if r.relationship_type.as_ref().map_or(false, |t| t == "DESCRIBES") + && r.spdx_element_id.as_ref().map_or(false, |t| t == &spdx_info.spdx_id) + { + r.related_spdx_element + } else { + None + } + }) + .collect(); + let mut packages = Vec::new(); - for package_info in packages_info { + for package_info in spdx_info.packages { + if spdx_info.document_describes.contains(&package_info.spdx_id) + || spdx_ids.contains(&package_info.spdx_id) + { + continue; + } match Package::try_from(&package_info) { Ok(pkg) => packages.push(pkg), Err(e) => { @@ -470,6 +507,98 @@ mod tests { assert!(pkgs.is_empty()) } + #[test] + fn removes_self_identified_package() { + let data = r##"SPDXVersion: SPDX-2.2 + DataLicense: CC0-1.0 + SPDXID: SPDXRef-DOCUMENT + DocumentName: Python-cve-bin-tool + DocumentNamespace: http://spdx.org/spdxdocs/Python-cve-bin-tool-4137f958-709e-4f44-940e-f477ded25cbd + LicenseListVersion: 3.22 + Creator: Tool: sbom4python-0.10.4 + Created: 2024-04-01T00:28:13Z + CreatorComment: This document has been automatically generated. + DocumentDescribes: SPDXRef-Package1, SPDXRef-Package2 + ##### + + PackageName: cve-bin-tool + SPDXID: SPDXRef-Package-1-cve-bin-tool + PackageVersion: 3.3rc2 + PrimaryPackagePurpose: APPLICATION + PackageSupplier: Person: Terri Oda (terri.oda@intel.com) + PackageDownloadLocation: https://pypi.org/project/cve-bin-tool/3.3rc2 + FilesAnalyzed: false + PackageChecksum: SHA1: c491590aeea36235930d1c6b8480d2489a470ece + PackageLicenseDeclared: GPL-3.0-or-later + PackageLicenseConcluded: GPL-3.0-or-later + PackageCopyrightText: NOASSERTION + PackageSummary: CVE Binary Checker Tool + ExternalRef: PACKAGE_MANAGER purl pkg:pypi/cve-bin-tool@3.3rc2 + ExternalRef: SECURITY cpe23Type cpe:2.3:a:terri_oda:cve-bin-tool:3.3rc2:*:*:*:*:*:*:* + ##### + + PackageName: aiohttp + SPDXID: SPDXRef-Package-2-aiohttp + PackageVersion: 3.9.3 + PrimaryPackagePurpose: LIBRARY + PackageSupplier: NOASSERTION + PackageDownloadLocation: https://pypi.org/project/aiohttp/3.9.3 + FilesAnalyzed: false + PackageLicenseDeclared: NOASSERTION + PackageLicenseConcluded: Apache-2.0 + PackageLicenseComments: aiohttp declares Apache 2 which is not currently a valid SPDX License identifier or expression. + PackageCopyrightText: NOASSERTION + PackageSummary: Async http client/server framework (asyncio) + ExternalRef: PACKAGE_MANAGER purl pkg:pypi/aiohttp@3.9.3 + ##### + + PackageName: @colors/colors + SPDXID: SPDXRef-Package1 + PackageVersion: 1.5.0 + PackageDownloadLocation: http://github.com/DABH/colors.js.git + PackageSourceInfo: acquired package info from installed node module manifest file: /usr/local/lib/node_modules/npm/node_modules/@colors/colors/package.json + PackageOriginator: Person: DABH + PackageLicenseDeclared: MIT + PackageLicenseConcluded: MIT + PackageCopyrightText: NOASSERTION + PackageHomePage: https://github.com/DABH/colors.js + ExternalRef: SECURITY cpe23Type cpe:2.3:a:\@colors\/colors:\@colors\/colors:1.5.0:*:*:*:*:*:*:* + ExternalRef: SECURITY cpe23Type cpe:2.3:a:DABH:\@colors\/colors:1.5.0:*:*:*:*:*:*:* + ExternalRef: SECURITY cpe23Type cpe:2.3:a:dabh:\@colors\/colors:1.5.0:*:*:*:*:*:*:* + ExternalRef: PACKAGE-MANAGER purl pkg:npm/%40colors/colors@1.5.0 + + PackageName: @discoveryjs/json-ext + SPDXID: SPDXRef-Package2 + PackageVersion: 0.5.6 + PackageDownloadLocation: NOASSERTION + PackageSourceInfo: acquired package info from installed node module manifest file: /usr/local/go/src/cmd/vendor/github.com/google/pprof/third_party/d3flamegraph/package-lock.json + PackageLicenseDeclared: NONE + PackageLicenseConcluded: NONE + PackageCopyrightText: NOASSERTION + ExternalRef: SECURITY cpe23Type cpe:2.3:a:\@discoveryjs\/json-ext:\@discoveryjs\/json-ext:0.5.6:*:*:*:*:*:*:* + ExternalRef: SECURITY cpe23Type cpe:2.3:a:\@discoveryjs\/json-ext:\@discoveryjs\/json_ext:0.5.6:*:*:*:*:*:*:* + ExternalRef: SECURITY cpe23Type cpe:2.3:a:\@discoveryjs\/json_ext:\@discoveryjs\/json-ext:0.5.6:*:*:*:*:*:*:* + ExternalRef: SECURITY cpe23Type cpe:2.3:a:\@discoveryjs\/json_ext:\@discoveryjs\/json_ext:0.5.6:*:*:*:*:*:*:* + ExternalRef: SECURITY cpe23Type cpe:2.3:a:\@discoveryjs\/json:\@discoveryjs\/json-ext:0.5.6:*:*:*:*:*:*:* + ExternalRef: SECURITY cpe23Type cpe:2.3:a:\@discoveryjs\/json:\@discoveryjs\/json_ext:0.5.6:*:*:*:*:*:*:* + ExternalRef: PACKAGE-MANAGER purl pkg:npm/%40discoveryjs/json-ext@0.5.6 + + Relationship: SPDXRef-DOCUMENT DESCRIBES SPDXRef-Package-1-cve-bin-tool + Relationship: SPDXRef-Package-1-cve-bin-tool DEPENDS_ON SPDXRef-Package-2-aiohttp + "##; + + let pkgs = Spdx.parse(data).unwrap(); + assert_eq!(pkgs.len(), 1); + + let expected_pkgs = Package { + name: "aiohttp".into(), + version: PackageVersion::FirstParty("3.9.3".into()), + package_type: PackageType::PyPi, + }; + + assert_eq!(expected_pkgs, pkgs[0]); + } + #[test] fn parse_spdx_2_2_tag_value() { let pkgs = Spdx.parse(include_str!("../../tests/fixtures/spdx-2.2.spdx")).unwrap(); @@ -591,7 +720,7 @@ mod tests { fn test_file_type() { let parse_results = Spdx.parse(include_str!("../../tests/fixtures/appbomination.spdx.json")); - let expected = anyhow!("missing field `externalRefs`").to_string(); + let expected = anyhow!("Missing package locator for Gradle").to_string(); let actual = parse_results.err().unwrap().to_string(); assert_eq!(actual, expected)