From 948cc157abefafef6c9693ac29f5cca6deaa9129 Mon Sep 17 00:00:00 2001 From: Matt Donoughe Date: Fri, 7 Apr 2023 09:18:10 -0400 Subject: [PATCH] check in purl code (#1) --- .cargo/config.toml | 2 + .github/ISSUE_TEMPLATE/feature_request.md | 6 +- .github/PULL_REQUEST_TEMPLATE.md | 2 - .github/workflows/main.yml | 163 +-- .gitignore | 4 - CODE_OF_CONDUCT.md | 132 +++ Cargo.lock | 609 ++++++++++ Cargo.toml | 13 +- LICENSE | 21 + README.md | 15 +- SECURITY.md | 10 + purl/Cargo.toml | 32 + purl/README.md | 51 + purl/src/builder.rs | 417 +++++++ purl/src/format.rs | 223 ++++ purl/src/lib.rs | 554 +++++++++ purl/src/package_type.rs | 347 ++++++ purl/src/parse.rs | 475 ++++++++ purl/src/qualifiers.rs | 1023 +++++++++++++++++ purl/src/qualifiers/well_known.rs | 426 +++++++ purl/src/qualifiers/well_known/maven.rs | 6 + purl_test/Cargo.toml | 10 + purl_test/src/lib.rs | 742 ++++++++++++ rustfmt.toml | 15 + src/main.rs | 3 - xtask/Cargo.toml | 20 + xtask/src/generate_tests.rs | 161 +++ xtask/src/generate_tests/README.md | 2 + xtask/src/generate_tests/test-suite-data.json | 554 +++++++++ xtask/src/lib.rs | 1 + xtask/src/main.rs | 69 ++ 31 files changed, 5975 insertions(+), 133 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 CODE_OF_CONDUCT.md create mode 100644 Cargo.lock create mode 100644 LICENSE create mode 100644 SECURITY.md create mode 100644 purl/Cargo.toml create mode 100644 purl/README.md create mode 100644 purl/src/builder.rs create mode 100644 purl/src/format.rs create mode 100644 purl/src/lib.rs create mode 100644 purl/src/package_type.rs create mode 100644 purl/src/parse.rs create mode 100644 purl/src/qualifiers.rs create mode 100644 purl/src/qualifiers/well_known.rs create mode 100644 purl/src/qualifiers/well_known/maven.rs create mode 100644 purl_test/Cargo.toml create mode 100644 purl_test/src/lib.rs create mode 100644 rustfmt.toml delete mode 100644 src/main.rs create mode 100644 xtask/Cargo.toml create mode 100644 xtask/src/generate_tests.rs create mode 100644 xtask/src/generate_tests/README.md create mode 100644 xtask/src/generate_tests/test-suite-data.json create mode 100644 xtask/src/lib.rs create mode 100644 xtask/src/main.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..35049cb --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +xtask = "run --package xtask --" diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 6c476bd..e38376a 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -12,8 +12,4 @@ A concise description of the new feature component. # Acceptance Criteria - [ ] Outputs XYZ when ... -- [ ] New endpoint ABC exists ... - -## Associated User Story -https://www.notion.so/phylum/... - +- [ ] New method ABC exists ... diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 658af40..36a70e2 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,7 +6,5 @@ Provide information on what this PR does and any context necessary to understand - [ ] Have you ensured that you have met the expected acceptance criteria? - [ ] Have you created sufficient tests? -Be sure to review additional expectations on the [developments standards page](https://www.notion.so/phylum/Development-Standards-and-Expectations-07f01c12f56b4bc099840d6074c92615#15d11ce9c55540cb90d604a4a178ee86). - # Issue What issue(s) does this PR close. Use the `closes #` here. diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f9a5096..f6d388f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,121 +11,66 @@ on: env: CARGO_TERM_COLOR: always -# A baseline workflow that does `cargo fmt -- --check`, `cargo clippy --all-targets -- -D warnings`, and `cargo test` in parallel jobs -# Repos should append any additional CI/CD needs as neccessary -# Additional jobs like pushing up a docker image are provided but disabled by default via the "if" conditional and has other "# SET ME" things to set if enabled jobs: - format-check: - runs-on: amd64 - steps: - - uses: actions/checkout@v2 - - name: Install minimal stable rustfmt - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - components: rustfmt - - name: Format Check - #run cargo fmt to fix if in violation - run: cargo fmt -- --check - build: - runs-on: amd64 + check: + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Install minimal stable with clippy - uses: actions-rs/toolchain@v1 + + - name: Install nightly with rustfmt + uses: dtolnay/rust-toolchain@nightly + with: + components: rustfmt + - name: Install stable with clippy + uses: dtolnay/rust-toolchain@stable with: - profile: minimal - toolchain: stable - components: clippy - - name: Build - run: cargo build + components: clippy + - uses: Swatinem/rust-cache@v2 + + - name: Format Check + #run cargo fmt to fix if in violation + run: cargo +nightly fmt -- --check + - name: Clippy Linting - # --all-targets means apply to test code too. Without it just applies to application code - run: cargo clippy --all-targets -- -D warnings + # --all-targets means apply to test code too. Without it just applies to application code + run: cargo clippy --all-targets -p phylum-purl -- -D warnings + + - name: Regenerate test suite + run: cargo xtask codegen + - name: Ensure no changes to test suite + run: | + git diff --exit-code -- purl_test + + - name: Check docs + run: RUSTDOCFLAGS="-D warnings" cargo xtask doc -p phylum-purl --no-deps + test: - runs-on: amd64 + strategy: + matrix: + include: + - name: Minimal + runner: test + features: --no-default-features + - name: Default + runner: test + features: "" + - name: Full + runner: tarpaulin + features: --all-features + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Install minimal stable - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - # - name: Start Docker Compose - # run: docker-compose up -d - # - name: Check containers after spinup - # run: docker ps -a - - name: Run tests - run: cargo test --verbose - # - name: Stop containers - # if: ${{ always() }} - # run: docker-compose down - build_image: - # The type of runner that the job will run on - runs-on: amd64 - needs: [test, build, format-check] - if: ${{ false }} # Disable for the template, users should opt into enabling the job - #if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/development' || startsWith(github.ref, 'refs/tags/v') - # Steps represent a sequence of tasks that will be executed as part of the job - steps: - # Checks-out your repsitory under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - with: - token: ${{secrets.BOT_SUBMODULE_TOKEN}} - submodules: recursive - - uses: webfactory/ssh-agent@v0.5.0 - with: - ssh-private-key: ${{ secrets.MACHINE_USER_SSH_KEY }} - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v3.x - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@v1 - with: - install: true - driver-opts: network=host + - name: Install stable with clippy + uses: dtolnay/rust-toolchain@stable + - uses: Swatinem/rust-cache@v2 + - name: Install tarpaulin + if: ${{ matrix.runner == 'tarpaulin' }} + run: | + curl --proto '=https' --tlsv1.2 -sSfL https://github.com/xd009642/tarpaulin/releases/download/0.25.1/cargo-tarpaulin-x86_64-unknown-linux-gnu.tar.gz > tarpaulin.tar.gz + printf 'd3f687ffc0c30ee1e7e5ea63f58b4fb770ce38f6d97d1afca340846ed783de85 tarpaulin.tar.gz' | sha256sum -c || exit 1 + mkdir -p ~/.cargo/bin + tar -xzf tarpaulin.tar.gz -C ~/.cargo/bin + rm tarpaulin.tar.gz - - name: Login to Harbor - uses: docker/login-action@v1 - with: - registry: harbor.prod-aws.phylum.dev - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_TOKEN }} - - name: Build and push Docker images - # You may pin to the exact commit or the version. - # uses: docker/build-push-action@4a531fa5a603bab87dfa56578bd82b28508c9547 - uses: docker/build-push-action@v2.2.2 - with: - # Build's context is the set of files located in the specified PATH or URL - context: # SET ME - # Path to the Dockerfile - file: # optional - # List of build-time variables - build-args: - # List of metadata for an image - labels: # optional - # List of tags - tags: # SET ME harbor.prod-aws.phylum.dev/phylum/NAME_HERE:${{ env.GITHUB_REF_SLUG }} - # Always attempt to pull a newer version of the image - pull: true - # Sets the target stage to build - target: # optional - # Do not use cache when building the image - no-cache: # optional, default is false - # List of target platforms for build - platforms: # optional - # Load is a shorthand for --output=type=docker - load: # optional, default is false - # Push is a shorthand for --output=type=registry - push: true - # List of output destinations (format: type=local,dest=path) - outputs: # optional - # List of external cache sources for buildx (eg. user/app:cache, type=local,src=path/to/dir) - cache-from: type=registry,ref=harbor.prod-aws.phylum.dev/phylum/phylum-janusgraph:${{ env.GITHUB_REF_SLUG }} - # List of cache export destinations for buildx (eg. user/app:cache, type=local,dest=path/to/dir) - cache-to: type=inline - # List of secrets to expose to the build (eg. key=value, GIT_AUTH_TOKEN=mytoken) - secrets: # optional - # GitHub Token used to authenticate against a repository for Git context - github-token: # optional, default is ${{ github.token }} - ssh: default + - name: Run tests + run: cargo ${{ matrix.runner }} ${{ matrix.features }} diff --git a/.gitignore b/.gitignore index 2e1dcd7..b0f5c38 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,3 @@ -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -Cargo.lock - **/*.envrc # Generated by Cargo diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..d90b1da --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +[engineering@phylum.io](mailto:engineering@phylum.io). +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..59f79c7 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,609 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "342258dd14006105c2b75ab1bd7543a03bdf0cfc94383303ac212a04939dff6f" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-wincon", + "concolor-override", + "concolor-query", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23ea9e81bd02e310c216d080f6223c179012256e5151c41db88d12c88a1684d2" + +[[package]] +name = "anstyle-parse" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7d1bb534e9efed14f3e5f44e7dd1a4f709384023a4165199a4241e18dff0116" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-wincon" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3127af6145b149f3287bb9a0d10ad9c5692dba8c53ad48285e5bec4063834fa" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + +[[package]] +name = "clap" +version = "4.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046ae530c528f252094e4a77886ee1374437744b2bff1497aa898bbddbbb29b3" +dependencies = [ + "clap_builder", + "clap_derive", + "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223163f58c9a40c3b0a43e1c4b50a9ce09f007ea2cb1ec258a687945b4b7929f" +dependencies = [ + "anstream", + "anstyle", + "bitflags", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "clap_lex" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" + +[[package]] +name = "concolor-override" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a855d4a1978dc52fb0536a04d384c2c0c1aa273597f08b77c8c4d3b2eec6037f" + +[[package]] +name = "concolor-query" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "errno" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "io-lifetimes" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys", +] + +[[package]] +name = "is-terminal" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "256017f749ab3117e93acb91063009e1f1bb56d03965b14c2c8df4eb02c524d8" +dependencies = [ + "hermit-abi", + "io-lifetimes", + "rustix", + "windows-sys", +] + +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.141" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" + +[[package]] +name = "linux-raw-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "percent-encoding" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" + +[[package]] +name = "phf" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicase", +] + +[[package]] +name = "phf_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +dependencies = [ + "siphasher", + "unicase", +] + +[[package]] +name = "phylum-purl" +version = "0.1.0" +dependencies = [ + "hex", + "maplit", + "percent-encoding", + "phf", + "serde", + "smartstring", + "thiserror", + "unicase", +] + +[[package]] +name = "prettyplease" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" +dependencies = [ + "proc-macro2", + "syn 2.0.13", +] + +[[package]] +name = "proc-macro2" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "purl_test" +version = "0.1.0" +dependencies = [ + "phylum-purl", +] + +[[package]] +name = "quote" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "regex" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "rustix" +version = "0.37.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aae838e49b3d63e9274e1c01833cc8139d3fec468c3b84688c628f44b1ae11d" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" + +[[package]] +name = "serde" +version = "1.0.159" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.159" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "serde_json" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + +[[package]] +name = "unicode-ident" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" + +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "xtask" +version = "0.1.0" +dependencies = [ + "clap", + "convert_case", + "lazy_static", + "phylum-purl", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "serde", + "serde_json", + "syn 2.0.13", +] diff --git a/Cargo.toml b/Cargo.toml index c2b9b00..edc6932 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,7 @@ -[package] -name = "repo-template" -version = "0.1.0" -edition = "2021" +[workspace] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] +members = [ + "purl", + "purl_test", + "xtask", +] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f5172e9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Phylum + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 062a800..e1aaa3a 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,17 @@ -# Project Name +# PURL ## Introduction -## Manually Starting Integration Testing Environment +This is a Rust implementation of Package URL. See [purl/README.md](purl/README.md) for details. -## Running +## Formatting +This project uses some nightly rustfmt settings. You can run `cargo +nightly fmt` or `cargo xtask fmt` to format the code. + +## Testing + +The `purl_test` crate contains a test suite automatically generated from the official PURL test suite. It can be regenerated by running `cargo xtask codegen`. + +## Documentation + +The documentation can be generated by running either `cargo doc` or `cargo xtask doc`. `cargo xtask doc` uses the nightly rustdoc to generate output more similar to docs.rs. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..544d35c --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,10 @@ +# Security Policy + +Phylum was founded by a team of security researchers at heart, and we take the security of our tooling seriously. + +## Reporting a Vulnerability + +We love coordinated disclosure! +Please email [security@phylum.io](mailto:security@phylum.io) to start a conversation! +We'll coordinate a secure communication mechanism first, then evaluate the reported issue(s) +and keep you apprised each step of the way. diff --git a/purl/Cargo.toml b/purl/Cargo.toml new file mode 100644 index 0000000..0fd5330 --- /dev/null +++ b/purl/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "phylum-purl" +version = "0.1.0" +edition = "2021" +description = "A Package URL implementation with customizable package types" +repository = "https://github.com/phylum-dev/purl/" +license = "MIT" +keywords = ["purl"] +categories = ["encoding", "parser-implementations"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +default = ["package-type", "smartstring"] +package-type = ["phf", "unicase"] + +[dependencies] +hex = "0.4.3" +percent-encoding = "2.2.0" +phf = { version = "0.11.1", features = ["macros", "unicase"], optional = true } +serde = { version = "1.0.150", optional = true } +smartstring = { version = "1.0.1", optional = true } +thiserror = "1.0.37" +unicase = { version = "2.6.0", optional = true } + +[dev-dependencies] +maplit = "1.0.2" + +[package.metadata.docs.rs] +all-features = true +targets = [] +rustdoc-args = ["--cfg", "docsrs"] diff --git a/purl/README.md b/purl/README.md new file mode 100644 index 0000000..e04b73b --- /dev/null +++ b/purl/README.md @@ -0,0 +1,51 @@ +[PURL] parsing, manipulation, and formatting. + +A PURL is an identifier that refers to a software package. For example, +`pkg:cargo/phylum-purl` refers to this package. + +This library supports PURLs at two levels: + +1. The shape and format of a PURL is implemented by [`GenericPurl`]. It is possible to work with package-type-agnostic PURLs by using types like `GenericPurl`. (see also [package-url/purl-spec#38]) +2. The behavior of package types is implemented by [`PackageType`] and combined with [`GenericPurl`] by the type alias [`Purl`]. This implementation differs slightly from the PURL specification (see [`PackageType`] for details). It is possible to implement different package-type-specific behaviors or support for different package types by implementing the [`PurlShape`] trait. + +[PURL]: https://github.com/package-url/purl-spec +[package-url/purl-spec#38]: https://github.com/package-url/purl-spec/issues/38 + +# Example + +```rust +use std::str::FromStr; + +use phylum_purl::GenericPurl; + +# fn main() -> Result<(), Box> { +let purl = GenericPurl::::from_str( + "pkg:NPM/@acme/example@1.2.3?Checksum=sha256:\ + E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855", +)?; +assert_eq!("npm", purl.package_type()); +assert_eq!(Some("@acme"), purl.namespace()); +assert_eq!("example", purl.name()); +assert_eq!(Some("1.2.3"), purl.version()); +// Normalization is performed during parsing. +assert_eq!( + "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + purl.qualifiers()["checksum"], +); +assert_eq!( + "pkg:npm/%40acme/example@1.2.3?checksum=sha256:\ + e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + &purl.to_string(), +); + +let purl = purl.into_builder().without_version().without_qualifier("checksum").build()?; +assert_eq!("pkg:npm/%40acme/example", &purl.to_string(),); +# Ok(()) +# } +``` + +# Features + +- package-type: [`PackageType`] and related types +- serde: PURLs can be serialized and deserialized from strings +- smartstring: The smartstring crate is used to reduce heap allocations diff --git a/purl/src/builder.rs b/purl/src/builder.rs new file mode 100644 index 0000000..8f4295d --- /dev/null +++ b/purl/src/builder.rs @@ -0,0 +1,417 @@ +use crate::qualifiers::well_known::{Checksum, KnownQualifierKey}; +use crate::{GenericPurl, ParseError, PurlParts, PurlShape, SmallString}; + +/// A mutable, potentially invalid PURL. +/// +/// This type is used while parsing or constructing PURLs. +/// [`GenericPurlBuilder::build`] converts the builder into an immutable, +/// validated PURL. +/// +/// # Example +/// +/// ``` +/// // `PurlBuilder` is an alias for `GenericPurlBuilder`. +/// use phylum_purl::{PackageType, PurlBuilder}; +/// +/// // Use the builder if you want to set fields besides the type and name. +/// let purl = PurlBuilder::new(PackageType::Maven, "my-package") +/// .with_namespace("my.company") +/// .build() +/// .unwrap(); +/// +/// assert_eq!("pkg:maven/my.company/my-package", &purl.to_string()); +/// ``` +#[derive(Clone, Debug, Default)] +#[must_use] +pub struct GenericPurlBuilder { + /// The package type. + pub package_type: T, + /// The type-specific parts that make up the PURL. + pub parts: PurlParts, +} + +impl GenericPurlBuilder { + /// Initialize a new PURL builder. + pub fn new(package_type: T, name: S) -> Self + where + SmallString: From, + { + Self { + package_type, + parts: PurlParts { name: SmallString::from(name), ..Default::default() }, + } + } + + /// Set the package type. + pub fn with_package_type(mut self, new: T) -> Self { + self.package_type = new; + self + } + + /// Set the namespace. + /// + /// Passing `""` unsets the namespace. + pub fn with_namespace(mut self, new: S) -> Self + where + SmallString: From, + { + self.parts.namespace = SmallString::from(new); + self + } + + /// Unset the namespace. + /// + /// This is the same as passing `""` to [`Self::with_namespace`]. + pub fn without_namespace(mut self) -> Self { + self.parts.namespace = Default::default(); + self + } + + /// Set the name. + pub fn with_name(mut self, new: S) -> Self + where + SmallString: From, + { + self.parts.name = SmallString::from(new); + self + } + + /// Set the version. + /// + /// Passing `""` unsets the version. + pub fn with_version(mut self, new: S) -> Self + where + SmallString: From, + { + self.parts.version = SmallString::from(new); + self + } + + /// Unset the version. + /// + /// This is the same as passing `""` to [`Self::with_version`]. + pub fn without_version(mut self) -> Self { + self.parts.version = Default::default(); + self + } + + /// Set a qualifier. + /// + /// If `v` is `""`, the qualifier will be unset. + pub fn with_qualifier(mut self, k: K, v: V) -> Result + where + K: AsRef, + SmallString: From + From, + { + self.parts.qualifiers.insert(k, v)?; + Ok(self) + } + + /// Set a qualifier. + /// + /// If `v` is `None`, the qualifier will be unset. + pub fn with_typed_qualifier(mut self, v: Option) -> Self + where + Q: KnownQualifierKey, + SmallString: From, + { + match v { + Some(v) => { + self.parts.qualifiers.insert_typed(v); + }, + None => { + self.parts.qualifiers.remove_typed::(); + }, + } + self + } + + /// Set a qualifier. + /// + /// If `v` is `None`, the qualifier will be unset. + pub fn try_with_typed_qualifier( + mut self, + v: Option, + ) -> Result>::Error> + where + Q: KnownQualifierKey, + SmallString: TryFrom, + { + match v { + Some(v) => { + self.parts.qualifiers.try_insert_typed(v)?; + }, + None => { + self.parts.qualifiers.remove_typed::(); + }, + } + Ok(self) + } + + /// Unset a qualifier. + /// + /// This is the same as passing `k, ""` to [`Self::with_qualifier`]. + pub fn without_qualifier(mut self, k: S) -> Self + where + S: AsRef, + SmallString: From, + { + self.parts.qualifiers.remove(k); + self + } + + /// Set the subpath. + /// + /// Passing `""` will unset the subpath. + pub fn with_subpath(mut self, new: S) -> Self + where + SmallString: From, + { + self.parts.subpath = SmallString::from(new); + self + } + + /// Unset the subpath. + /// + /// This is the same as passing `""` to [`Self::with_subpath`]. + pub fn without_subpath(mut self) -> Self { + self.parts.subpath = Default::default(); + self + } + + /// Convert the PURL builder into a PURL. + /// + /// The [`PurlShape::finish`] will be called on `T` to apply normalization + /// and validation rules. + pub fn build(mut self) -> Result, ::Error> + where + T: PurlShape, + { + self.package_type.finish(&mut self.parts)?; + + if self.parts.name.is_empty() { + return Err(T::Error::from(ParseError::MissingRequiredField(crate::PurlField::Name))); + } + + // Empty qualifiers are the same as unset qualifiers. + self.parts.qualifiers.retain(|_, v| !v.is_empty()); + + if let Some(checksum) = self.parts.qualifiers.try_get_typed::()? { + // We can't just use `try_insert_typed` because we can't express to the borrow + // checker that `Checksum<'a>`'s immutable borrow of `self.parts.qualifiers` + // ends in the middle of `try_insert_typed` before the mutable borrow is + // required. + self.parts.qualifiers.insert(Checksum::KEY, SmallString::try_from(checksum)?)?; + } + + let GenericPurlBuilder { package_type, parts } = self; + + Ok(GenericPurl { package_type, parts }) + } +} + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + + use maplit::hashmap; + + use super::*; + use crate::qualifiers::well_known::RepositoryUrl; + use crate::qualifiers::Qualifiers; + use crate::PurlField; + + #[test] + fn with_package_type_sets_type() { + let builder = GenericPurlBuilder { package_type: "old", parts: PurlParts::default() } + .with_package_type("new"); + assert_eq!("new", builder.package_type); + } + + #[test] + fn with_namespace_some_sets_namespace() { + let builder = GenericPurlBuilder::<&str>::default().with_namespace("new"); + assert_eq!("new", &builder.parts.namespace); + } + + #[test] + fn without_namespace_unsets_namespace() { + let builder = GenericPurlBuilder { + package_type: "", + parts: PurlParts { namespace: "old".into(), ..Default::default() }, + } + .without_namespace(); + assert_eq!("", &builder.parts.namespace); + } + + #[test] + fn with_name_sets_name() { + let builder = GenericPurlBuilder::<&str>::default().with_name("new"); + assert_eq!("new", &builder.parts.name); + } + + #[test] + fn with_version_some_sets_version() { + let builder = GenericPurlBuilder::<&str>::default().with_version("new"); + assert_eq!("new", &builder.parts.version); + } + + #[test] + fn without_version_unsets_version() { + let builder = GenericPurlBuilder { + package_type: "", + parts: PurlParts { version: "old".into(), ..Default::default() }, + } + .without_version(); + assert_eq!("", &builder.parts.version); + } + + #[test] + fn with_qualifier_with_new_valid_key_sets_qualifier() { + let builder = + GenericPurlBuilder { package_type: "", parts: PurlParts { ..Default::default() } } + .with_qualifier("ok", "value") + .unwrap(); + assert_eq!( + hashmap! { "ok" => "value" }, + builder.parts.qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect(), + ) + } + + #[test] + fn with_qualifier_with_new_invalid_key_returns_error() { + let result = + GenericPurlBuilder { package_type: "", parts: PurlParts { ..Default::default() } } + .with_qualifier("", ""); + assert!(matches!(result, Err(ParseError::InvalidQualifier))); + } + + #[test] + fn with_qualifier_with_existing_key_sets_qualifier() { + let builder = GenericPurlBuilder { + package_type: "", + parts: PurlParts { + qualifiers: Qualifiers::try_from_iter([("ok", "old")]).unwrap(), + ..Default::default() + }, + } + .with_qualifier("ok", "new") + .unwrap(); + assert_eq!( + hashmap! { "ok" => "new" }, + builder.parts.qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect(), + ) + } + + #[test] + fn with_typed_qualifier_with_new_key_and_some_value_sets_qualifier() { + let builder = + GenericPurlBuilder { package_type: "", parts: PurlParts { ..Default::default() } } + .with_typed_qualifier(Some(RepositoryUrl::from("example.com"))); + assert_eq!( + hashmap! { RepositoryUrl::KEY => "example.com" }, + builder.parts.qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect(), + ) + } + + #[test] + fn with_typed_qualifier_with_existing_key_and_none_value_unsets_qualifier() { + let builder = GenericPurlBuilder { + package_type: "", + parts: PurlParts { + qualifiers: Qualifiers::try_from_iter([(RepositoryUrl::KEY, "example.com")]) + .unwrap(), + ..Default::default() + }, + } + .with_typed_qualifier(None::); + assert_eq!( + hashmap! {}, + builder.parts.qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect(), + ) + } + + #[test] + fn try_with_typed_qualifier_with_new_key_and_some_value_sets_qualifier() { + let builder = + GenericPurlBuilder { package_type: "", parts: PurlParts { ..Default::default() } } + .try_with_typed_qualifier(Some(RepositoryUrl::from("example.com"))) + .unwrap(); + assert_eq!( + hashmap! { RepositoryUrl::KEY => "example.com" }, + builder.parts.qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect(), + ) + } + + #[test] + fn try_with_typed_qualifier_with_existing_key_and_none_value_unsets_qualifier() { + let builder = GenericPurlBuilder { + package_type: "", + parts: PurlParts { + qualifiers: Qualifiers::try_from_iter([(RepositoryUrl::KEY, "example.com")]) + .unwrap(), + ..Default::default() + }, + } + .try_with_typed_qualifier(None::) + .unwrap(); + assert_eq!( + hashmap! {}, + builder.parts.qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect(), + ) + } + + #[test] + fn without_qualifier_with_existing_key_unsets_qualifier() { + let builder = GenericPurlBuilder { + package_type: "", + parts: PurlParts { + qualifiers: Qualifiers::try_from_iter([("ok", "old")]).unwrap(), + ..Default::default() + }, + } + .without_qualifier("ok"); + assert_eq!(hashmap! {}, builder.parts.qualifiers.iter().collect()) + } + + #[test] + fn with_subpath_some_sets_subpath() { + let builder = GenericPurlBuilder::<&str>::default().with_subpath("new"); + assert_eq!("new", &builder.parts.subpath); + } + + #[test] + fn without_subpath_unsets_subpath() { + let builder = GenericPurlBuilder { + package_type: "", + parts: PurlParts { subpath: "old".into(), ..Default::default() }, + } + .without_subpath(); + assert_eq!("", &builder.parts.subpath); + } + + #[test] + fn build_works() { + let purl = GenericPurlBuilder::default() + .with_package_type(Cow::Borrowed("type")) + .with_namespace("namespace") + .with_name("name") + .with_version("version") + .with_qualifier("key", "value") + .unwrap() + .with_subpath("subpath") + .build() + .expect("build failed"); + assert_eq!("type", purl.package_type().package_type()); + assert_eq!("name", purl.name()); + assert_eq!(Some("version"), purl.version()); + assert_eq!(Some("value"), purl.qualifiers().get("key")); + assert_eq!(Some("subpath"), purl.subpath()); + } + + #[test] + fn empty_package_name_is_invalid() { + let error = GenericPurl::new("type".to_owned(), "").unwrap_err(); + assert!(matches!(error, ParseError::MissingRequiredField(PurlField::Name))); + } +} diff --git a/purl/src/format.rs b/purl/src/format.rs new file mode 100644 index 0000000..c7ec2bc --- /dev/null +++ b/purl/src/format.rs @@ -0,0 +1,223 @@ +//! Support for converting a PURL to a string. + +use std::fmt; + +use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; + +use crate::{is_valid_package_type, GenericPurl, PurlShape}; + +/// https://url.spec.whatwg.org/#fragment-percent-encode-set +const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); + +/// https://url.spec.whatwg.org/#query-percent-encode-set +const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>'); + +/// https://url.spec.whatwg.org/#path-percent-encode-set +const PATH: &AsciiSet = &QUERY.add(b'?').add(b'`').add(b'{').add(b'}'); + +// https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#how-to-build-purl-string-from-its-components +// We mostly use the standard URL rules, but the PURL spec says '@' '?' '#' must +// be escaped except when used as a separator. +const PURL_PATH: &AsciiSet = &PATH.add(b'@').add(b'?').add(b'#'); +const PURL_PATH_SEGMENT: &AsciiSet = &PURL_PATH.add(b'/'); +const PURL_QUERY: &AsciiSet = &QUERY.add(b'@').add(b'?').add(b'#'); +const PURL_FRAGMENT: &AsciiSet = &FRAGMENT.add(b'@').add(b'?').add(b'#'); + +impl fmt::Display for GenericPurl +where + T: PurlShape, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let package_type = self.package_type().package_type(); + + if !is_valid_package_type(&package_type) { + panic!("Invalid package type {:?}", &*package_type); + } + + write!( + f, + "pkg:{}/", + // The PURL spec says the type must _not_ be encoded. + // Technically, it is encoded, but we validated that it doesn't contain any characters + // that would require being encoded. + package_type, + )?; + + if let Some(namespace) = self.namespace() { + // The namespace is multiple path components. + write!(f, "{}/", utf8_percent_encode(namespace, PURL_PATH))?; + } + + // The name is only one path segment. + write!(f, "{}", utf8_percent_encode(self.name(), PURL_PATH_SEGMENT))?; + + if let Some(version) = self.version() { + // The version is a continuation of the same path segment. + write!(f, "@{}", utf8_percent_encode(version, PURL_PATH_SEGMENT))?; + } + + if !self.parts.qualifiers.is_empty() { + let mut prefix = '?'; + for (k, v) in &self.parts.qualifiers { + write!( + f, + "{}{}={}", + prefix, + utf8_percent_encode(k, PURL_QUERY), + utf8_percent_encode(v, PURL_QUERY), + )?; + prefix = '&'; + } + } + + if let Some(subpath) = self.subpath() { + write!(f, "#{}", utf8_percent_encode(subpath, PURL_FRAGMENT))?; + } + + Ok(()) + } +} + +#[cfg(feature = "serde")] +mod ser { + use serde::Serialize; + + use super::*; + + impl Serialize for GenericPurl + where + T: PurlShape, + { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.collect_str(self) + } + } +} + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + + use super::*; + use crate::{GenericPurlBuilder, ParseError, PurlParts}; + + #[test] + #[should_panic] + fn display_disallows_invalid_package_types() { + struct MyBadPackageType; + + // Properly implemented `PurlShape`s only return valid package types. + // Even the included string-based implementations validate the package type in + // the `finish` method. + impl PurlShape for MyBadPackageType { + type Error = ParseError; + + fn package_type(&self) -> Cow { + Cow::Borrowed("!") + } + + fn finish(&mut self, _parts: &mut PurlParts) -> Result<(), Self::Error> { + Ok(()) + } + } + + match GenericPurlBuilder::new(MyBadPackageType, "name").build() { + Ok(purl) => { + _ = purl.to_string(); + }, + Err(error) => { + // Don't use unwrap or the test will incorrectly pass if the purl cannot be + // built. + eprintln!("Unexpected error: {}", error); + }, + } + } + + #[test] + fn display_encodes_namespace_correctly() { + assert_eq!( + "pkg:generic/a%23/b%3F/c%40/name", + &GenericPurlBuilder::new(Cow::Borrowed("generic"), "name") + .with_namespace("a#/b?/c@") + .build() + .expect("Could not build PURL") + .to_string(), + ); + } + + #[test] + fn display_encodes_name_correctly() { + assert_eq!( + "pkg:generic/a%23%2Fb%3F%2Fc%40", + &GenericPurlBuilder::new(Cow::Borrowed("generic"), "a#/b?/c@") + .build() + .expect("Could not build PURL") + .to_string(), + ); + } + + #[test] + fn display_encodes_version_correctly() { + assert_eq!( + "pkg:generic/name@a%23%2Fb%3F%2Fc%40", + &GenericPurlBuilder::new(Cow::Borrowed("generic"), "name") + .with_version("a#/b?/c@") + .build() + .expect("Could not build PURL") + .to_string(), + ); + } + + #[test] + fn display_encodes_qualifiers_correctly() { + assert_eq!( + "pkg:generic/name?a=%23&b=%3F&c=%40", + &GenericPurlBuilder::new(Cow::Borrowed("generic"), "name") + .with_qualifier("a", "#") + .expect("Could not set qualifier a") + .with_qualifier("b", "?") + .expect("Could not set qualifier b") + .with_qualifier("c", "@") + .expect("Could not set qualifier c") + .build() + .expect("Could not build PURL") + .to_string(), + ); + } + + #[test] + fn display_encodes_subpath_correctly() { + assert_eq!( + "pkg:generic/name#a%23/b%3F/c%40", + &GenericPurlBuilder::new(Cow::Borrowed("generic"), "name") + .with_subpath("a#/b?/c@") + .build() + .expect("Could not build PURL") + .to_string(), + ); + } + + #[cfg(feature = "serde")] + #[test] + fn serialize_serializes_correctly() { + use std::fmt::Display; + + use serde::Serialize; + + struct SerializeToFormatter(GenericPurl); + + impl Display for SerializeToFormatter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.serialize(f).unwrap(); + Ok(()) + } + } + + let serialized = + SerializeToFormatter(GenericPurl::new("type".to_owned(), "name").unwrap()).to_string(); + assert_eq!("pkg:type/name", &serialized); + } +} diff --git a/purl/src/lib.rs b/purl/src/lib.rs new file mode 100644 index 0000000..a882b2b --- /dev/null +++ b/purl/src/lib.rs @@ -0,0 +1,554 @@ +#![doc = include_str!("../README.md")] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +use std::borrow::Cow; + +pub use builder::*; +pub use format::*; +#[cfg(feature = "package-type")] +pub use package_type::*; +pub use parse::*; +pub use qualifiers::Qualifiers; +use smartstring::{LazyCompact, SmartString, SmartStringMode}; + +mod builder; +mod format; +#[cfg(feature = "package-type")] +mod package_type; +mod parse; +pub mod qualifiers; + +/// A string that may be stored inline instead of on the heap. +/// +/// PURLs may contain many small strings so this saves on heap allocations. +// This needs to be public because it gets exposed in some methods of Qualifiers. +#[cfg(feature = "smartstring")] +pub type SmallString = SmartString; +// When compiling without smartstring we'll just use regular Strings. +#[cfg(not(feature = "smartstring"))] +type SmallString = String; + +/// A type that provides package-type-specific behavior. +/// +/// If it supports your requirements, you can use or extend [`PackageType`]. +/// (see also [`Purl`]) +/// +/// If you don't care about package-type-specific behavior, you can use +/// [`String`], [`Cow`], or [`SmallString`]. +/// +/// # Example +/// +/// ``` +/// use std::borrow::Cow; +/// use std::str::FromStr; +/// +/// use phylum_purl::{GenericPurl, GenericPurlBuilder, ParseError, PurlParts, PurlShape}; +/// +/// enum MyPackageType { +/// Custom, +/// } +/// +/// #[derive(Debug, thiserror::Error)] +/// enum MyError { +/// #[error("Parse error: {0}")] +/// Parse(#[from] ParseError), +/// #[error("Unsupported package type")] +/// UnsupportedType, +/// #[error("Required repository_url qualifier was not found")] +/// MissingRepositoryUrl, +/// } +/// +/// impl FromStr for MyPackageType { +/// type Err = MyError; +/// +/// fn from_str(s: &str) -> Result { +/// if s.eq_ignore_ascii_case("custom") { +/// Ok(MyPackageType::Custom) +/// } else { +/// Err(MyError::UnsupportedType) +/// } +/// } +/// } +/// +/// impl PurlShape for MyPackageType { +/// type Error = MyError; +/// +/// fn package_type(&self) -> Cow { +/// match self { +/// // Always use lower case types here. +/// // Upper case characters are not invalid, but the canonical type name is always +/// // lower case. +/// MyPackageType::Custom => Cow::Borrowed("custom"), +/// } +/// } +/// +/// fn finish(&mut self, parts: &mut PurlParts) -> Result<(), Self::Error> { +/// match self { +/// MyPackageType::Custom => { +/// // pkg:custom names are always lower case. +/// parts.name = parts.name.to_lowercase().into(); +/// // pkg:custom requires a repository_url. +/// if !parts.qualifiers.contains_key("repository_url") { +/// return Err(MyError::MissingRepositoryUrl); +/// } +/// }, +/// } +/// Ok(()) +/// } +/// } +/// +/// type Purl = GenericPurl; +/// type PurlBuilder = GenericPurlBuilder; +/// +/// assert!(matches!( +/// Purl::from_str("pkg:custom/Example?repository_url=https://example.com/") +/// .map(|p| p.to_string()) +/// .as_deref(), +/// Ok("pkg:custom/example?repository_url=https://example.com/"), +/// )); +/// assert!(matches!(Purl::from_str("pkg:custom/Example"), Err(MyError::MissingRepositoryUrl),)); +/// ``` +pub trait PurlShape { + /// The type of error returned by this package type. + type Error: From; + + /// Get the string representation of this `PurlShape`. + /// + /// The returned value should be a lower case string. If the returned value + /// contains invalid characters, `Display` and `to_string` will panic. + #[must_use] + fn package_type(&self) -> Cow; + + /// Preview and potentially modify the parts that make up a PURL. + /// + /// This is called when a [`GenericPurl`] is being created. It gives the + /// `PurlShape` implementation a chance to perform validation and + /// normalization. + fn finish(&mut self, parts: &mut PurlParts) -> Result<(), Self::Error>; +} + +/// A generic [`PurlShape`] that can support any package type but does not +/// provide any type-specific functionality. +/// +/// Without type-specific functionality, it's possible to create PURLs that have +/// incorrect capitalization or are missing a required namespace or required +/// qualifiers. +impl PurlShape for String { + type Error = ParseError; + + fn package_type(&self) -> Cow { + Cow::Borrowed(self) + } + + fn finish(&mut self, _parts: &mut PurlParts) -> Result<(), Self::Error> { + str_preview_mut(self)?; + Ok(()) + } +} + +/// A generic [`PurlShape`] that can support any package type but does not +/// provide any type-specific functionality. +/// +/// Without type-specific functionality, it's possible to create PURLs that have +/// incorrect capitalization or are missing a required namespace or required +/// qualifiers. +impl<'a> PurlShape for Cow<'a, str> { + type Error = ParseError; + + fn package_type(&self) -> Cow { + Cow::Borrowed(self) + } + + fn finish(&mut self, _parts: &mut PurlParts) -> Result<(), Self::Error> { + match self { + Cow::Owned(v) => str_preview_mut(v)?, + Cow::Borrowed(v) => { + if !is_valid_package_type(v) { + return Err(ParseError::InvalidPackageType); + } + if !v.chars().all(|c| c.is_ascii_lowercase()) { + *self = Cow::Owned(v.to_ascii_lowercase()) + } + }, + } + Ok(()) + } +} + +/// A generic [`PurlShape`] that can support any package type but does not +/// provide any type-specific functionality. +/// +/// Without type-specific functionality, it's possible to create PURLs that have +/// incorrect capitalization or are missing a required namespace or required +/// qualifiers. +impl PurlShape for SmartString +where + M: SmartStringMode, +{ + type Error = ParseError; + + fn package_type(&self) -> Cow { + Cow::Borrowed(self) + } + + fn finish(&mut self, _parts: &mut PurlParts) -> Result<(), Self::Error> { + str_preview_mut(self)?; + Ok(()) + } +} + +fn str_preview_mut(s: &mut str) -> Result<(), ParseError> { + if !is_valid_package_type(s) { + return Err(ParseError::InvalidPackageType); + } + s.make_ascii_lowercase(); + Ok(()) +} + +/// The parts that make up a PURL, minus the package type. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[must_use] +pub struct PurlParts { + /// The namespace. + pub namespace: SmallString, + /// The name. + pub name: SmallString, + /// The version. + pub version: SmallString, + /// The qualifiers. + pub qualifiers: Qualifiers, + /// The subpath. + pub subpath: SmallString, +} + +/// An immutable PURL. +/// +/// This type does not directly include any package-type-specific behavior. Any +/// package-type-specific behavior is added using [`PurlShape`]. +/// +/// # Example +/// +/// ``` +/// // `Purl` is an alias for `GenericPurl`. +/// use phylum_purl::{PackageType, Purl}; +/// +/// // Use the builder if you want to set fields besides the type and name. +/// let purl = Purl::builder(PackageType::Npm, "my-package").with_version("1.2.3").build().unwrap(); +/// +/// assert_eq!("pkg:npm/my-package@1.2.3", &purl.to_string()); +/// ``` +/// +/// # See also +/// +/// See [`Purl`] for information about using the built-in [`PackageType`] enum. +/// +/// See [`PurlShape`] if you want to use your own package types. +#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[must_use] +pub struct GenericPurl { + package_type: T, + parts: PurlParts, +} + +impl GenericPurl { + /// Create a new [`PurlBuilder`]. + pub fn builder(package_type: T, name: S) -> GenericPurlBuilder + where + SmallString: From, + T: PurlShape, + { + GenericPurlBuilder::new(package_type, name) + } + + /// Create a new PURL. + /// + /// An error will be returned if the [`PurlShape`] implementation `T` + /// requires additional fields to be specified for `package_type`. For + /// example, `Purl::new(PackageType::Maven, "my-package")` will fail because + /// Maven requires a namespace. In that case, you must use [`Self::builder`] + /// to set the additional required fields. + pub fn new(package_type: T, name: S) -> Result + where + SmallString: From, + T: PurlShape, + { + Self::builder(package_type, name).build() + } + + /// Get the package type. + #[must_use] + pub fn package_type(&self) -> &T { + &self.package_type + } + + /// Get the namespace. + #[must_use] + pub fn namespace(&self) -> Option<&str> { + Some(&*self.parts.namespace).filter(|v| !v.is_empty()) + } + + /// Get the name. + #[must_use] + pub fn name(&self) -> &str { + &self.parts.name + } + + /// Get the version. + #[must_use] + pub fn version(&self) -> Option<&str> { + Some(&*self.parts.version).filter(|v| !v.is_empty()) + } + + /// Get the qualifiers. + #[must_use] + pub fn qualifiers(&self) -> &Qualifiers { + &self.parts.qualifiers + } + + /// Get the subpath. + #[must_use] + pub fn subpath(&self) -> Option<&str> { + Some(&*self.parts.subpath).filter(|v| !v.is_empty()) + } + + /// Convert this PURL into a mutable form. + pub fn into_builder(self) -> GenericPurlBuilder { + let GenericPurl { package_type, parts } = self; + GenericPurlBuilder { package_type, parts } + } +} + +/// Check whether a package type string is valid according to the rules of the +/// PURL spec. +#[must_use] +fn is_valid_package_type(package_type: &str) -> bool { + // https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#rules-for-each-purl-component + const ALLOWED_SPECIAL_CHARS: &[char] = &['.', '+', '-']; + !package_type.is_empty() + && package_type + .chars() + .all(|c| c.is_ascii_alphanumeric() || ALLOWED_SPECIAL_CHARS.contains(&c)) +} + +/// Try to convert a `SmallString` to lowercase without allocating. +fn lowercase_in_place(s: &mut SmallString) { + enum State { + Lower, + MixedAscii, + MixedUnicode, + } + let mut state = State::Lower; + for c in s.chars() { + if c.is_uppercase() { + if c.is_ascii() { + state = State::MixedAscii; + } else { + state = State::MixedUnicode; + break; + } + } + } + match state { + State::Lower => {}, + State::MixedAscii => { + s.make_ascii_lowercase(); + }, + State::MixedUnicode => { + *s = s.chars().flat_map(|c| c.to_lowercase()).collect(); + }, + } +} + +/// Try to convert a `&str` to a lowercase `SmallString` without allocating. +fn copy_as_lowercase(s: &str) -> SmallString { + enum State { + Lower, + MixedAscii, + MixedUnicode, + } + let mut state = State::Lower; + for c in s.chars() { + if c.is_uppercase() { + if c.is_ascii() { + state = State::MixedAscii; + } else { + state = State::MixedUnicode; + break; + } + } + } + match state { + State::Lower => SmallString::from(s), + State::MixedAscii => { + let mut v = SmallString::from(s); + v.make_ascii_lowercase(); + v + }, + State::MixedUnicode => s.chars().flat_map(|c| c.to_lowercase()).collect(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn string_shape_converts_to_lower() { + let purl = GenericPurlBuilder::new("TEST".to_owned(), "name") + .build() + .expect("Could not build PURL"); + assert_eq!("test", purl.package_type()); + assert_eq!("pkg:test/name", &purl.to_string()); + } + + #[test] + fn string_shape_disallows_invalid_names() { + let error = GenericPurlBuilder::new("!".to_owned(), "name") + .build() + .expect_err("Build with invalid type should have failed"); + assert!(matches!(error, ParseError::InvalidPackageType), "Got unexpected error {}", error,); + } + + #[test] + fn cow_shape_borrowed_converts_to_lower() { + let purl = GenericPurlBuilder::new(Cow::Borrowed("TEST"), "name") + .build() + .expect("Could not build PURL"); + assert_eq!("test", purl.package_type()); + assert_eq!("pkg:test/name", &purl.to_string()); + } + + #[test] + fn cow_shape_owned_converts_to_lower() { + let purl = GenericPurlBuilder::new(Cow::Owned("TEST".to_owned()), "name") + .build() + .expect("Could not build PURL"); + assert_eq!("test", purl.package_type()); + assert_eq!("pkg:test/name", &purl.to_string()); + } + + #[test] + fn cow_shape_does_not_clone_lower() { + let original = "test"; + let purl = GenericPurlBuilder::new(Cow::Borrowed(original), "name") + .build() + .expect("Could not build PURL"); + assert_eq!(original.as_ptr(), purl.package_type.as_ptr()); + } + + #[test] + fn cow_shape_does_not_clone_owned() { + let original = "TEST".to_owned(); + let original_ptr = original.as_ptr(); + let purl = GenericPurlBuilder::new(Cow::Owned(original), "name") + .build() + .expect("Could not build PURL"); + assert_eq!("test", purl.package_type()); + assert_eq!(original_ptr, purl.package_type.as_ptr()); + } + + #[test] + fn cow_shape_disallows_invalid_names() { + let error = GenericPurlBuilder::new(Cow::Borrowed("!"), "name") + .build() + .expect_err("Build with invalid type should have failed"); + assert!(matches!(error, ParseError::InvalidPackageType), "Got unexpected error {}", error,); + } + + #[test] + fn smallstring_shape_converts_to_lower() { + let purl = GenericPurlBuilder::new(SmallString::from("TEST"), "name") + .build() + .expect("Could not build PURL"); + assert_eq!("test", purl.package_type()); + assert_eq!("pkg:test/name", &purl.to_string()); + } + + #[test] + fn smallstring_shape_disallows_invalid_names() { + let error = GenericPurlBuilder::new(SmallString::from("!"), "name") + .build() + .expect_err("Build with invalid type should have failed"); + assert!(matches!(error, ParseError::InvalidPackageType), "Got unexpected error {}", error,); + } + + #[test] + fn into_builder_build_produces_same_purl() { + let original = GenericPurlBuilder::new(Cow::Borrowed("type"), "name") + .with_namespace("namespace") + .with_subpath("subpath") + .with_version("1.0") + .with_qualifier("key", "value") + .unwrap() + .build() + .unwrap(); + let round_trip = original.clone().into_builder().build().unwrap(); + assert_eq!(original, round_trip); + } + + #[test] + fn lowercase_in_place_when_lower_does_nothing() { + let mut lower = SmallString::from("a"); + lowercase_in_place(&mut lower); + assert_eq!("a", &lower); + } + + #[test] + fn lowercase_in_place_when_upper_ascii_lowercases() { + let mut lower = SmallString::from("A"); + lowercase_in_place(&mut lower); + assert_eq!("a", &lower); + } + + #[test] + fn lowercase_in_place_when_upper_unicode_lowercases() { + let mut lower = SmallString::from("Æ"); + lowercase_in_place(&mut lower); + assert_eq!("æ", &lower); + } + + #[test] + fn copy_as_lowercase_when_lower_does_nothing() { + let upper = "a"; + let lower = copy_as_lowercase(upper); + assert_eq!("a", &lower); + } + + #[test] + fn copy_as_lowercase_when_upper_ascii_lowercases() { + let upper = "A"; + let lower = copy_as_lowercase(upper); + assert_eq!("a", &lower); + } + + #[test] + fn copy_as_lowercase_when_upper_unicode_lowercases() { + let upper = "Æ"; + let lower = copy_as_lowercase(upper); + assert_eq!("æ", &lower); + } + + #[test] + fn empty_package_type_is_invalid() { + let error = GenericPurl::new(Cow::Borrowed(""), "name").unwrap_err(); + assert!(matches!(error, ParseError::InvalidPackageType)); + } + + #[test] + fn namespace_when_empty_is_none() { + let purl = GenericPurl::new(Cow::Borrowed("type"), "name").unwrap(); + assert_eq!(None, purl.namespace()); + } + + #[test] + fn version_when_empty_is_none() { + let purl = GenericPurl::new(Cow::Borrowed("type"), "name").unwrap(); + assert_eq!(None, purl.version()); + } + + #[test] + fn subpath_when_empty_is_none() { + let purl = GenericPurl::new(Cow::Borrowed("type"), "name").unwrap(); + assert_eq!(None, purl.subpath()); + } +} diff --git a/purl/src/package_type.rs b/purl/src/package_type.rs new file mode 100644 index 0000000..eb0e312 --- /dev/null +++ b/purl/src/package_type.rs @@ -0,0 +1,347 @@ +//! Support for known package types. + +use std::borrow::Cow; +use std::str::FromStr; + +use phf::phf_map; +use unicase::UniCase; + +use crate::{ + lowercase_in_place, GenericPurl, GenericPurlBuilder, ParseError, PurlField, PurlShape, + SmallString, +}; + +/// A PURL that supports the known package types. +/// +/// The spec lists many types, and some of the types may not be correct or fully +/// described. Rather than implementing an exhaustive list and potentially +/// claiming to support something that is incorrectly implemented, only some +/// types are supported here. (see also [package-url/purl-spec#38]) +/// +/// If you need additional types or different behavior, you can provide your own +/// [`PurlShape`] implementation. +/// +/// [package-url/purl-spec#38]: https://github.com/package-url/purl-spec/issues/38 +/// +/// # Differences compared to current PURL spec +/// +/// - The PURL spec says that NuGet package names are case sensitive, but this +/// implementation converts them to lowercase. This is consistent with the +/// behavior of NuGet, which requires clients to convert package names to +/// lowercase before calling the v3 package API. ([package-url/purl-spec#226]) +/// - The PURL spec's implementation of Python package name normalization is +/// incomplete. In addition to converting dashes to underscores, periods are +/// also converted to underscores, and consequitive underscores are combined +/// into single underscores. This implementation matches the Python behavior. +/// ([package-url/purl-spec#165]) +/// - The PURL spec says that NPM package names are case insensitive, but this +/// implementation does not convert them to lowercase. *New* NPM packages must +/// have lowercase names, but there are already NPM packages in existance with +/// uppercase names and those packages are distinct from other packages that +/// have the same name in lowercase. ([package-url/purl-spec#136]) +/// +/// [package-url/purl-spec#226]: https://github.com/package-url/purl-spec/issues/226 +/// [package-url/purl-spec#165]: https://github.com/package-url/purl-spec/pull/165 +/// [package-url/purl-spec#136]: https://github.com/package-url/purl-spec/issues/136 +/// +/// # Extending `PackageType` +/// +/// If you want to extend `PackageType` with support for another package type, +/// you can do so via delegation. +/// +/// ``` +/// use std::borrow::Cow; +/// use std::str::FromStr; +/// +/// use phylum_purl::{ +/// GenericPurl, GenericPurlBuilder, PackageError, PackageType, PurlParts, PurlShape, +/// UnsupportedPackageType, +/// }; +/// +/// #[derive(Clone, Copy)] +/// enum MyPackageType { +/// PackageType(PackageType), +/// Custom, +/// } +/// +/// type Purl = GenericPurl; +/// +/// impl FromStr for MyPackageType { +/// type Err = UnsupportedPackageType; +/// +/// fn from_str(s: &str) -> Result { +/// // Always try your types first. +/// // Otherwise there may be unexpected behavioral changes if PackageType starts +/// // supporting your types. +/// if s.eq_ignore_ascii_case("custom") { +/// Ok(MyPackageType::Custom) +/// } else { +/// PackageType::from_str(s).map(MyPackageType::PackageType) +/// } +/// } +/// } +/// +/// impl PurlShape for MyPackageType { +/// type Error = PackageError; +/// +/// fn package_type(&self) -> Cow { +/// match self { +/// MyPackageType::PackageType(t) => t.package_type(), +/// MyPackageType::Custom => Cow::Borrowed("custom"), +/// } +/// } +/// +/// fn finish(&mut self, parts: &mut PurlParts) -> Result<(), Self::Error> { +/// match self { +/// MyPackageType::PackageType(t) => t.finish(parts), +/// MyPackageType::Custom => { +/// // your logic here +/// Ok(()) +/// }, +/// } +/// } +/// } +/// +/// assert!(matches!( +/// Purl::from_str("pkg:custom/example").unwrap().package_type(), +/// MyPackageType::Custom, +/// )); +/// ``` +pub type Purl = GenericPurl; + +/// A PURL builder that supports the known package types. +pub type PurlBuilder = GenericPurlBuilder; + +/// The known package types. +/// +/// This is a subset of the types described in the PURL spec repository. See +/// [`Purl`] for details. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[non_exhaustive] +pub enum PackageType { + Cargo, + Gem, + Golang, + Maven, + Npm, + NuGet, + PyPI, +} + +static PACKAGE_TYPES: phf::Map, PackageType> = phf_map! { + UniCase::ascii("cargo") => PackageType::Cargo, + UniCase::ascii("gem") => PackageType::Gem, + UniCase::ascii("golang") => PackageType::Golang, + UniCase::ascii("maven") => PackageType::Maven, + UniCase::ascii("npm") => PackageType::Npm, + UniCase::ascii("nuget") => PackageType::NuGet, + UniCase::ascii("pypi") => PackageType::PyPI, +}; + +impl PackageType { + /// Get the name of the package type as a `&'static str`. + #[must_use] + pub const fn name(&self) -> &'static str { + match self { + PackageType::Cargo => "cargo", + PackageType::Gem => "gem", + PackageType::Golang => "golang", + PackageType::Maven => "maven", + PackageType::Npm => "npm", + PackageType::NuGet => "nuget", + PackageType::PyPI => "pypi", + } + } +} + +impl From for &'static str { + fn from(value: PackageType) -> Self { + value.name() + } +} + +impl AsRef for PackageType { + fn as_ref(&self) -> &str { + self.name() + } +} + +/// An error that is returned when an unsupported package type is used. +#[derive(Debug, thiserror::Error)] +#[error("Unsupported package type")] +pub struct UnsupportedPackageType; + +impl FromStr for PackageType { + type Err = UnsupportedPackageType; + + fn from_str(s: &str) -> Result { + PACKAGE_TYPES.get(&UniCase::new(s)).copied().ok_or(UnsupportedPackageType) + } +} + +/// An error that is returned when working with [`PackageType`]. +#[derive(Debug, thiserror::Error)] +pub enum PackageError { + /// One or more required field is missing. + /// + /// # Example + /// + /// ``` + /// use std::str::FromStr; + /// + /// use phylum_purl::{PackageError, Purl, PurlField}; + /// + /// assert!(matches!( + /// Purl::from_str("pkg:golang/name@version"), + /// Err(PackageError::MissingRequiredField(PurlField::Namespace)), + /// )); + /// ``` + #[error("The {0} field must be present")] + MissingRequiredField(PurlField), + /// The PURL could not be parsed. + #[error("{0}")] + Parse(#[from] ParseError), + /// The package type is unsupported. + #[error("Unsupported package type")] + UnsupportedType, +} + +impl From for PackageError { + fn from(_: UnsupportedPackageType) -> Self { + PackageError::UnsupportedType + } +} + +impl PurlShape for PackageType { + type Error = PackageError; + + fn package_type(&self) -> Cow { + self.name().into() + } + + fn finish(&mut self, parts: &mut crate::PurlParts) -> Result<(), Self::Error> { + match self { + PackageType::Cargo | PackageType::Gem | PackageType::Npm => {}, + PackageType::Golang => { + if parts.namespace.is_empty() { + return Err(PackageError::MissingRequiredField(PurlField::Namespace)); + } + lowercase_in_place(&mut parts.namespace); + lowercase_in_place(&mut parts.name); + }, + PackageType::Maven => { + if parts.namespace.is_empty() { + return Err(PackageError::MissingRequiredField(PurlField::Namespace)); + } + }, + PackageType::NuGet => { + lowercase_in_place(&mut parts.name); + }, + PackageType::PyPI => { + fix_pypi_name(&mut parts.name); + }, + } + Ok(()) + } +} + +fn fix_pypi_name(name: &mut SmallString) { + // https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization + // Replace runs of consecutive ".-_" characters with a single "-". + const DASH_CHARACTERS: &[char] = &['-', '_', '.']; + if name.contains(DASH_CHARACTERS) { + let mut result = SmallString::new(); + let mut in_dash = false; + for c in name.chars() { + if DASH_CHARACTERS.contains(&c) { + if !in_dash { + result.push('-'); + in_dash = true; + } + } else { + in_dash = false; + result.extend(c.to_lowercase()); + } + } + *name = result; + } else { + lowercase_in_place(name) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nuget_lowercases_names() { + let purl = Purl::new(PackageType::NuGet, "Newtonsoft.Json").unwrap(); + assert_eq!("pkg:nuget/newtonsoft.json", &purl.to_string()); + } + + #[test] + fn pypi_lowercases_names() { + let purl = Purl::new(PackageType::PyPI, "PyTest").unwrap(); + assert_eq!("pkg:pypi/pytest", &purl.to_string()); + } + + #[test] + fn fix_pypi_name_with_leading() { + let mut name = SmallString::from("_-.-_leading"); + fix_pypi_name(&mut name); + assert_eq!("-leading", &name); + } + + #[test] + fn fix_pypi_name_with_inner() { + let mut name = SmallString::from("inner_-.-_inner"); + fix_pypi_name(&mut name); + assert_eq!("inner-inner", &name); + } + + #[test] + fn fix_pypi_name_with_trailing() { + let mut name = SmallString::from("trailing_-.-_"); + fix_pypi_name(&mut name); + assert_eq!("trailing-", &name); + } + + #[test] + fn cargo_does_not_lowercase_names() { + let purl = Purl::new(PackageType::Cargo, "Inflector").unwrap(); + assert_eq!("pkg:cargo/Inflector", &purl.to_string()); + } + + #[test] + fn npm_does_not_lowercase_names() { + let purl = Purl::new(PackageType::Npm, "parseUri").unwrap(); + assert_eq!("pkg:npm/parseUri", &purl.to_string()); + } + + #[test] + fn maven_requires_namespace() { + let error = Purl::new(PackageType::Maven, "invalid").unwrap_err(); + assert!( + matches!(error, PackageError::MissingRequiredField(PurlField::Namespace)), + "Expected missing namespace error but got {error}", + ); + } + + #[test] + fn golang_requires_namespace() { + let error = Purl::new(PackageType::Golang, "invalid").unwrap_err(); + assert!( + matches!(error, PackageError::MissingRequiredField(PurlField::Namespace)), + "Expected missing namespace error but got {error}", + ); + } + + #[test] + fn golang_lowercases_names() { + let purl = Purl::builder(PackageType::Golang, "Cobra") + .with_namespace("GitHub.com/SPF13") + .build() + .unwrap(); + assert_eq!("pkg:golang/github.com/spf13/cobra", &purl.to_string()); + } +} diff --git a/purl/src/parse.rs b/purl/src/parse.rs new file mode 100644 index 0000000..17c108c --- /dev/null +++ b/purl/src/parse.rs @@ -0,0 +1,475 @@ +//! Support for parsing PURLs. + +use std::borrow::Cow; +use std::convert::Infallible; +use std::fmt::{self, Write}; +use std::str::FromStr; + +use percent_encoding::percent_decode_str; + +use crate::qualifiers::Entry; +use crate::{ + is_valid_package_type, GenericPurl, GenericPurlBuilder, PurlParts, PurlShape, SmallString, +}; + +/// An error returned when trying to parse an invalid PURL. +#[derive(Debug, thiserror::Error)] +pub enum ParseError { + /// The URL scheme is not pkg. + /// + /// # Example + /// + /// ``` + /// use std::str::FromStr; + /// + /// use phylum_purl::{GenericPurl, ParseError}; + /// + /// assert!(matches!( + /// GenericPurl::::from_str("http://example.com"), + /// Err(ParseError::UnsupportedUrlScheme), + /// )); + /// ``` + #[error("URL scheme must be pkg")] + UnsupportedUrlScheme, + /// The PURL is incomplete. + /// + /// # Example + /// + /// ``` + /// use std::str::FromStr; + /// + /// use phylum_purl::{GenericPurl, ParseError, PurlField}; + /// + /// assert!(matches!( + /// GenericPurl::::from_str("pkg:npm"), + /// Err(ParseError::MissingRequiredField(PurlField::Name)), + /// )); + /// ``` + #[error("Missing required field {0}")] + MissingRequiredField(PurlField), + /// The package type contains invalid characters. + /// + /// # Example + /// + /// ``` + /// use std::str::FromStr; + /// + /// use phylum_purl::{GenericPurl, ParseError}; + /// + /// assert!(matches!( + /// // Because the package type was omitted, + /// // the namespace is seen to be the package type. + /// GenericPurl::::from_str("pkg:@acme/example"), + /// Err(ParseError::InvalidPackageType), + /// )); + /// ``` + #[error("Invalid package type")] + InvalidPackageType, + /// The PURL contains invalid qualifiers. + /// + /// # Example + /// + /// ``` + /// use std::str::FromStr; + /// + /// use phylum_purl::{GenericPurl, ParseError}; + /// + /// assert!(matches!( + /// GenericPurl::::from_str("pkg:npm/example?="), + /// Err(ParseError::InvalidQualifier), + /// )); + /// ``` + #[error("Invalid qualifier")] + InvalidQualifier, + /// The PURL contains illegal escaped characters. + /// + /// # Example + /// + /// ``` + /// use std::str::FromStr; + /// + /// use phylum_purl::{GenericPurl, ParseError}; + /// + /// assert!(matches!( + /// GenericPurl::::from_str("pkg:npm/%80"), + /// Err(ParseError::InvalidEscape), + /// )); + /// ``` + #[error("An escape sequence contains invalid characters")] + InvalidEscape, +} + +// This is a workaround for a typing issue. `::Err = +// Infalible`, which the compiler considers to be a normal error type. +impl From for ParseError { + fn from(_: Infallible) -> Self { + unreachable!() + } +} + +/// A specific, fixed field of a PURL. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum PurlField { + /// The type, also known as the protocol. + /// + /// For example, "npm" in `pkg:npm/my-package`. + PackageType, + /// The namespace. + /// + /// For example, "my.company" in `pkg:maven/my.company/my-package`. + Namespace, + /// The name. + /// + /// For example, "my-package" in `pkg:npm/my-package`. + Name, + /// The version. + /// + /// For example, "1.0" in `pkg:npm/my-package@1.0`. + Version, + /// The subpath. + /// + /// For example, "lib" in `pkg:golang/github.com/my-company/my-package#lib`. + Subpath, +} + +impl PurlField { + /// Get a `&'static str` representing the `PurlField`. + pub const fn name(&self) -> &'static str { + match self { + PurlField::PackageType => "package type", + PurlField::Namespace => "namespace", + PurlField::Name => "name", + PurlField::Version => "version", + PurlField::Subpath => "subpath", + } + } +} + +impl From for &'static str { + fn from(value: PurlField) -> Self { + value.name() + } +} + +impl fmt::Display for PurlField { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name()) + } +} + +impl FromStr for GenericPurl +where + T: FromStr + PurlShape, + ::Error: From<::Err>, +{ + type Err = T::Error; + + fn from_str(s: &str) -> Result { + // This mostly follows the procedure documented in the PURL spec. + // https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#how-to-parse-a-purl-string-in-its-components + let s = s.strip_prefix("pkg:").ok_or(ParseError::UnsupportedUrlScheme)?; + + // PURLs are not supposed to have any leading slashes, but the spec says that + // parsers must ignore them. + let s = s.trim_start_matches('/'); + + let mut parts = PurlParts::default(); + + // Remove subpath and qualifiers from the end now because they have higher + // precedence than the path separater. + let s = match s.split_once('#') { + Some((s, subpath)) => { + parts.subpath = decode_subpath(subpath)?; + s + }, + None => s, + }; + + let s = match s.split_once('?') { + Some((s, qualifiers)) => { + decode_qualifiers(qualifiers, &mut parts)?; + s + }, + None => s, + }; + + if s.is_empty() { + return Err(ParseError::MissingRequiredField(PurlField::PackageType).into()); + } + + let (package_type, s) = + s.split_once('/').ok_or(ParseError::MissingRequiredField(PurlField::Name))?; + + if !is_valid_package_type(package_type) { + return Err(ParseError::InvalidPackageType.into()); + } + + let package_type = T::from_str(package_type)?; + + // The namespace is optional so we may not have any more slashes. + let name_and_version = match s.rsplit_once('/') { + Some((namespace, s)) => { + parts.namespace = decode_namespace(namespace)?; + s + }, + None => s, + }; + + match name_and_version.rsplit_once('@') { + Some((name, version)) => { + parts.name = decode(name)?.into(); + parts.version = decode(version)?.into(); + }, + None => { + parts.name = decode(name_and_version)?.into(); + }, + }; + + GenericPurlBuilder { package_type, parts }.build() + } +} + +fn decode_subpath(subpath: &str) -> Result { + let subpath = subpath.trim_matches('/'); + + let mut rebuilt = SmallString::new(); + for segment in subpath.split('/') { + if ["", ".", ".."].contains(&segment) { + continue; + } + let decoded = decode(segment)?; + if decoded.contains('/') || [".", ".."].contains(&&*decoded) { + return Err(ParseError::InvalidEscape); + } + if !rebuilt.is_empty() { + rebuilt.push('/'); + } + write!(rebuilt, "{}", decoded).unwrap(); + } + + Ok(rebuilt) +} + +fn decode_qualifiers(s: &str, parts: &mut PurlParts) -> Result<(), ParseError> { + for qualifier in s.split('&') { + if let Some((k, v)) = qualifier.split_once('=') { + let Entry::Vacant(entry) = parts.qualifiers.entry(k)? else { + return Err(ParseError::InvalidQualifier); + }; + + let v = decode(v)?; + if v.is_empty() { + continue; + } + + entry.insert(v); + } else { + return Err(ParseError::InvalidQualifier); + } + } + + Ok(()) +} + +fn decode_namespace(namespace: &str) -> Result { + let namespace = namespace.trim_matches('/'); + + let mut rebuilt = SmallString::new(); + for segment in namespace.split('/') { + if segment.is_empty() { + continue; + } + let decoded = decode(segment)?; + if decoded.contains('/') { + return Err(ParseError::InvalidEscape); + } + if !rebuilt.is_empty() { + rebuilt.push('/'); + } + write!(rebuilt, "{}", decoded).unwrap(); + } + + Ok(rebuilt) +} + +fn decode(input: &str) -> Result, ParseError> { + percent_decode_str(input).decode_utf8().map_err(|_| ParseError::InvalidEscape) +} + +#[cfg(feature = "serde")] +mod de { + use std::marker::PhantomData; + + use serde::de::{Error, Visitor}; + use serde::Deserialize; + + use super::*; + + impl<'de, T> Deserialize<'de> for GenericPurl + where + T: FromStr + PurlShape, + ::Error: fmt::Display + From<::Err>, + { + fn deserialize(deserializer: D) -> Result + where + D: ::serde::Deserializer<'de>, + { + deserializer.deserialize_str(PurlVisitor(PhantomData)) + } + } + + struct PurlVisitor(PhantomData); + + impl<'de, T> Visitor<'de> for PurlVisitor + where + T: FromStr + PurlShape, + ::Error: fmt::Display + From<::Err>, + { + type Value = GenericPurl; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("A PURL string") + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + GenericPurl::::from_str(v).map_err(Error::custom) + } + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use super::*; + + #[test] + fn parse_when_empty_returns_error() { + let error = GenericPurl::::from_str("").unwrap_err(); + assert!(matches!(error, ParseError::UnsupportedUrlScheme)); + } + + #[test] + fn parse_without_type_returns_error() { + let error = GenericPurl::::from_str("pkg:").unwrap_err(); + assert!(matches!(error, ParseError::MissingRequiredField(PurlField::PackageType))); + } + + #[test] + fn parse_without_name_returns_error() { + let error = GenericPurl::::from_str("pkg:type").unwrap_err(); + assert!(matches!(error, ParseError::MissingRequiredField(PurlField::Name))); + } + + #[test] + fn parse_when_type_invalid_returns_error() { + let error = GenericPurl::::from_str("pkg:@invalid/name").unwrap_err(); + assert!(matches!(error, ParseError::InvalidPackageType)); + } + + #[test] + fn parse_when_qualifier_invalid_returns_error() { + let error = GenericPurl::::from_str("pkg:type/name?!").unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn parse_when_escape_contains_illegal_chars_returns_error() { + let error = GenericPurl::::from_str("pkg:type/%80").unwrap_err(); + assert!(matches!(error, ParseError::InvalidEscape)); + } + + #[test] + fn parse_when_escaped_namespace_component_contains_path_separator_returns_error() { + let error = GenericPurl::::from_str("pkg:type/a%2fb/name").unwrap_err(); + assert!(matches!(error, ParseError::InvalidEscape)); + } + + #[test] + fn parse_when_namespace_contains_weird_components_preserves_them() { + let parsed = GenericPurl::::from_str("pkg:type/a//b/./c/../d/name").unwrap(); + assert_eq!("pkg:type/a/b/./c/../d/name", &parsed.to_string()); + } + + #[test] + fn parse_when_subpath_contains_invalid_components_skips_them() { + let parsed = GenericPurl::::from_str("pkg:type/name#/a//b/./c/../d/").unwrap(); + assert_eq!("pkg:type/name#a/b/c/d", &parsed.to_string()); + } + + #[test] + fn parse_when_escaped_subpath_component_contains_path_separator_returns_error() { + let error = GenericPurl::::from_str("pkg:type/name#a%2fb").unwrap_err(); + assert!(matches!(error, ParseError::InvalidEscape)); + } + + #[test] + fn parse_when_qualifiers_are_duplicated_returns_error() { + let error = GenericPurl::::from_str("pkg:type/name?a=a&a=b").unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn parse_when_qualifier_has_no_value_skips_it() { + let parsed = GenericPurl::::from_str("pkg:type/name?a=").unwrap(); + assert_eq!("pkg:type/name", &parsed.to_string()); + } + + #[test] + fn parse_when_qualifiers_contains_checksums_normalizes_them() { + let parsed = + GenericPurl::::from_str("pkg:type/name?checksum=hash2:12345678,HASH1:aAbBcCdD") + .unwrap(); + assert_eq!("pkg:type/name?checksum=hash1:aabbccdd,hash2:12345678", &parsed.to_string()); + } + + #[test] + fn parse_when_checksum_contains_invalid_hex_char_returns_error() { + let error = GenericPurl::::from_str("pkg:type/name?checksum=hash1:xx").unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn parse_when_checksum_is_malformed_returns_error() { + let error = GenericPurl::::from_str("pkg:type/name?checksum=hash1").unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn parse_when_checksum_is_duplicated_returns_error() { + let error = GenericPurl::::from_str("pkg:type/name?checksum=hash1:00,HASH1:11") + .unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn parse_parses_fields() { + let purl = + GenericPurl::::from_str("pkg:type/namespace/name@version?key=value#subpath") + .unwrap(); + assert_eq!("type", purl.package_type()); + assert_eq!(Some("namespace"), purl.namespace()); + assert_eq!("name", purl.name()); + assert_eq!(Some("version"), purl.version()); + assert_eq!(Some("value"), purl.qualifiers().get("key")); + assert_eq!(Some("subpath"), purl.subpath()); + } + + #[cfg(feature = "serde")] + #[test] + fn deserialize_deserializes_correctly() { + use serde::de::IntoDeserializer; + use serde::Deserialize; + + let deserialized = GenericPurl::::deserialize(IntoDeserializer::< + serde::de::value::Error, + >::into_deserializer( + "pkg:type/name".to_owned() + )) + .unwrap(); + assert_eq!(GenericPurl::::from_str("pkg:type/name").unwrap(), deserialized,); + } +} diff --git a/purl/src/qualifiers.rs b/purl/src/qualifiers.rs new file mode 100644 index 0000000..6ab8fc4 --- /dev/null +++ b/purl/src/qualifiers.rs @@ -0,0 +1,1023 @@ +//! Specialized key-value collection for PURL qualifiers. + +use std::cmp::Ordering; +use std::marker::PhantomData; +use std::ops::{Deref, Index, IndexMut}; +use std::{mem, slice}; + +use self::well_known::KnownQualifierKey; +use crate::{ParseError, SmallString}; + +pub mod well_known; + +/// A list of qualifiers. +/// +/// Internally, qualifiers are stored as a sorted list of key-value pairs, ready +/// to be joined into a properly formatted PURL string. +/// +/// The keys are always valid qualifier names in their canonical format +/// (lowercase). Uppercase keys are automatically converted to lowercase. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub struct Qualifiers { + qualifiers: Vec<(QualifierKey, SmallString)>, +} + +impl Qualifiers { + /// Try to construct a [`Qualifiers`] list from key-value pairs. + /// + /// If any of the keys cannot be converted to a qualifier name, + /// [`ParseError::InvalidQualifier`] will be returned. + /// + /// If the same key is repeated, [`ParseError::InvalidQualifier`] will be + /// returned. + pub fn try_from_iter(items: I) -> Result + where + I: IntoIterator, + K: AsRef, + V: AsRef, + SmallString: From + From, + { + let items = items.into_iter(); + let mut this = Qualifiers::with_capacity(items.size_hint().0); + for (key, value) in items { + match this.entry(key)? { + Entry::Occupied(_) => return Err(ParseError::InvalidQualifier), + Entry::Vacant(entry) => { + entry.insert(value); + }, + } + } + Ok(this) + } + + /// Create an empty [`Qualifiers`] list with space for `capacity` elements. + pub fn with_capacity(capacity: usize) -> Self { + let mut this = Self::default(); + this.reserve_exact(capacity); + this + } + + /// Get the total capacity of the list. + pub fn capacity(&self) -> usize { + self.qualifiers.capacity() + } + + /// Iterate over the elements of the list. + pub fn iter(&self) -> Iter { + Iter(self.qualifiers.iter()) + } + + /// Iterate over the elements of the list. + /// + /// Only the value may be mutated. + pub fn iter_mut(&mut self) -> IterMut { + IterMut(self.qualifiers.iter_mut()) + } + + /// Get the length of the list. + pub fn len(&self) -> usize { + self.qualifiers.len() + } + + /// Check if the list is empty. + pub fn is_empty(&self) -> bool { + self.qualifiers.is_empty() + } + + /// Remove all elements from the list. + pub fn clear(&mut self) { + self.qualifiers.clear() + } + + /// Ensure the list has capacity for at least `additional` more elements. + /// + /// Compared to [`Self::reserve_exact`], if the capacity needs to be + /// increased, this function may increase the capacity by more than the + /// requested amount in order to have room for additional elements that + /// may come later. + pub fn reserve(&mut self, additional: usize) { + self.qualifiers.reserve(additional) + } + + /// Ensure the list has capacity for at least `additional` more elements. + /// + /// Compared to [`Self::reserve`], if the capacity needs to be increased, + /// this function increases the capacity by the minimum number of + /// elements to reach the desired capacity. + pub fn reserve_exact(&mut self, additional: usize) { + self.qualifiers.reserve_exact(additional) + } + + /// Get a qualifier by key. + /// + /// If the qualifier is not in the list, `None` is returned. + pub fn get(&self, key: K) -> Option<&str> + where + K: AsRef, + { + self.get_index(key).map(|i| self.qualifiers[i].1.as_str()) + } + + /// Get a typed qualifier. + /// + /// If the qualifier is not in the list, `None` is returned. + pub fn get_typed<'a, Q>(&'a self) -> Option + where + Q: From<&'a str> + KnownQualifierKey, + { + self.get(Q::KEY).map(Q::from) + } + + /// Try to get a typed qualifier. + /// + /// If the qualifier is not in the list, `Ok(None)` is returned. + pub fn try_get_typed<'a, Q>(&'a self) -> Result, Q::Error> + where + Q: TryFrom<&'a str> + KnownQualifierKey, + { + self.get(Q::KEY).map(Q::try_from).transpose() + } + + fn get_index(&self, key: K) -> Option + where + K: AsRef, + { + // If it's not valid, it has no index. + let key = check_qualifier_key(key).ok()?; + self.search(&key).ok() + } + + fn search(&self, key: &MixedQualifierKey) -> Result + where + K: AsRef, + { + self.qualifiers.binary_search_by(|(qk, _qv)| qk.partial_cmp(&key).unwrap()) + } + + /// Get an [`Entry`] for a qualifier. + /// + /// This allows obtaining the current value and modifying it or inserting a + /// new value without needing to search for the qualifier multiple + /// times. + pub fn entry(&mut self, key: K) -> Result, ParseError> + where + K: AsRef, + { + let key = check_qualifier_key(key)?; + Ok(match self.search(&key) { + Ok(index) => Entry::Occupied(OccupiedEntry { + qualifiers: &mut self.qualifiers, + index, + key: PhantomData, + }), + Err(index) => { + Entry::Vacant(VacantEntry { qualifiers: &mut self.qualifiers, index, key }) + }, + }) + } + + /// Get a qualifier. + pub fn get_mut(&mut self, key: K) -> Option<&mut SmallString> + where + K: AsRef, + { + match self.entry(key) { + Ok(Entry::Occupied(o)) => Some(o.into_mut()), + _ => None, + } + } + + /// Check whether a qualifier with the given name exists. + pub fn contains_key(&self, key: K) -> bool + where + K: AsRef, + { + self.get_index(key).is_some() + } + + /// Check whether a qualifier with the given name exists. + pub fn contains_typed(&self) -> bool + where + Q: KnownQualifierKey, + { + self.contains_key(Q::KEY) + } + + /// Set a qualifier. + pub fn insert(&mut self, key: K, v: V) -> Result<&mut SmallString, ParseError> + where + K: AsRef, + SmallString: From + From, + { + let key = check_qualifier_key(key)?; + let index = match self.search(&key) { + Ok(i) => { + self.qualifiers[i].1 = SmallString::from(v); + i + }, + Err(i) => { + self.qualifiers.insert(i, (key.into_key(), SmallString::from(v))); + i + }, + }; + Ok(&mut self.qualifiers[index].1) + } + + /// Set a typed qualifier. + /// + /// # Panics + /// + /// This method panics if the [`KnownQualifierKey::KEY`] is not a valid + /// qualifier key. + pub fn insert_typed(&mut self, value: Q) + where + Q: KnownQualifierKey, + SmallString: From, + { + // Rust 1.68.1 gets confused without this type annotation. + self.insert::<&'static str, _>(Q::KEY, value).unwrap(); + } + + /// Set a typed qualifier. + /// + /// # Panics + /// + /// This method panics if the [`KnownQualifierKey::KEY`] is not a valid + /// qualifier key. + pub fn try_insert_typed( + &mut self, + value: Q, + ) -> Result<(), >::Error> + where + Q: KnownQualifierKey, + SmallString: TryFrom, + { + let value = SmallString::try_from(value)?; + self.insert(Q::KEY, value).unwrap(); + Ok(()) + } + + /// Unset a qualifier. + pub fn remove(&mut self, key: S) -> Option + where + S: AsRef, + { + if let Some(index) = self.get_index(key) { + Some(self.qualifiers.remove(index).1) + } else { + None + } + } + + /// Unset a typed qualifier. + pub fn remove_typed(&mut self) + where + Q: KnownQualifierKey, + { + self.remove(Q::KEY); + } + + /// Retain only qualifiers that match the given predicate. + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&QualifierKey, &str) -> bool, + { + self.qualifiers.retain(move |q| f(&q.0, &q.1)); + } + + /// Retain only qualifiers that match the given predicate. + pub fn retain_mut(&mut self, mut f: F) + where + F: FnMut(&QualifierKey, &mut SmallString) -> bool, + { + self.qualifiers.retain_mut(move |q| f(&q.0, &mut q.1)); + } +} + +impl<'a> IntoIterator for &'a Qualifiers { + type IntoIter = Iter<'a>; + type Item = (&'a QualifierKey, &'a str); + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> IntoIterator for &'a mut Qualifiers { + type IntoIter = IterMut<'a>; + type Item = (&'a QualifierKey, &'a mut SmallString); + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +/// A case-insensitive qualifier name. +/// +/// Comparisons between this type and other types are case insensitive. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +pub struct QualifierKey(SmallString); + +impl PartialEq for QualifierKey +where + S: AsRef + ?Sized, +{ + fn eq(&self, other: &S) -> bool { + self.partial_cmp(other).map(|o| o.is_eq()).unwrap_or_default() + } +} + +impl PartialOrd for QualifierKey +where + S: AsRef + ?Sized, +{ + fn partial_cmp(&self, other: &S) -> Option { + let other = other.as_ref().chars().flat_map(|c| c.to_lowercase()); + Some(self.0.chars().cmp(other)) + } +} + +impl Deref for QualifierKey { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl QualifierKey { + /// Get a reference to the lower case string. + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +/// A representation of a qualifier that may or may not exist in the list. +pub enum Entry<'a, K> { + Occupied(OccupiedEntry<'a, K>), + Vacant(VacantEntry<'a, K>), +} + +impl<'a, K> Entry<'a, K> { + /// If the qualifier does not exist, create it by inserting `default`. + /// + /// Returns a mutable reference to the value of the qualifier. + pub fn or_insert(self, default: V) -> &'a mut SmallString + where + SmallString: From + From, + { + match self { + Entry::Occupied(o) => o.into_mut(), + Entry::Vacant(v) => v.insert(default), + } + } + + /// If the qualifier does not exist, create it by inserting `default()`. + /// + /// Returns a mutable reference to the value of the qualifier. + pub fn or_insert_with(self, default: F) -> &'a mut SmallString + where + F: FnOnce() -> V, + SmallString: From + From, + { + match self { + Entry::Occupied(o) => o.into_mut(), + Entry::Vacant(v) => v.insert(default()), + } + } + + /// If the qualifier exists, modify it by calling `f()`. + pub fn and_modify(mut self, f: F) -> Self + where + F: FnOnce(&mut SmallString), + { + match &mut self { + Entry::Occupied(ref mut o) => f(o.get_mut()), + Entry::Vacant(_) => {}, + } + self + } +} + +/// A representation of a qualifier that exists in the list. +pub struct OccupiedEntry<'a, K> { + qualifiers: &'a mut Vec<(QualifierKey, SmallString)>, + index: usize, + key: PhantomData, +} + +impl<'a, K> OccupiedEntry<'a, K> { + /// Remove the qualifier from the list and return it as a key-value pair. + pub fn remove_entry(self) -> (SmallString, SmallString) { + let (k, v) = self.qualifiers.remove(self.index); + (k.0, v) + } + + /// Get the value of the qualifier. + pub fn get(&self) -> &str { + &self.qualifiers[self.index].1 + } + + /// Get the value of the qualifier. + pub fn get_mut(&mut self) -> &mut SmallString { + &mut self.qualifiers[self.index].1 + } + + /// Convert this entry into a mutable reference to the qualifier's value. + /// + /// This is similar to [`Self::get_mut()`] but has different lifetimes. + pub fn into_mut(self) -> &'a mut SmallString { + &mut self.qualifiers[self.index].1 + } + + /// Overwrite the value of the qualifier. + /// + /// The previous value is returned. + pub fn insert(&mut self, value: V) -> SmallString + where + SmallString: From, + { + let mut v = SmallString::from(value); + mem::swap(&mut v, &mut self.qualifiers[self.index].1); + v + } + + /// Remove the qualifier from the list and return its value. + pub fn remove(self) -> SmallString { + self.qualifiers.remove(self.index).1 + } +} + +/// A representation of a qualifier that does not exist in the list. +pub struct VacantEntry<'a, K> { + qualifiers: &'a mut Vec<(QualifierKey, SmallString)>, + index: usize, + key: MixedQualifierKey, +} + +impl<'a, K> VacantEntry<'a, K> { + /// Insert the qualifier with `value`. + pub fn insert(self, value: V) -> &'a mut SmallString + where + SmallString: From + From, + { + self.qualifiers.insert(self.index, (self.key.into_key(), SmallString::from(value))); + &mut self.qualifiers[self.index].1 + } +} + +/// An iterator over the qualifier key value pairs. +#[must_use] +pub struct Iter<'a>(slice::Iter<'a, (QualifierKey, SmallString)>); + +impl<'a> Iterator for Iter<'a> { + type Item = (&'a QualifierKey, &'a str); + + fn next(&mut self) -> Option { + let (k, v) = self.0.next()?; + Some((k, v.as_str())) + } + + fn size_hint(&self) -> (usize, Option) { + (self.0.len(), Some(self.0.len())) + } +} + +impl<'a> ExactSizeIterator for Iter<'a> {} + +impl<'a> DoubleEndedIterator for Iter<'a> { + fn next_back(&mut self) -> Option { + let (k, v) = self.0.next_back()?; + Some((k, v.as_str())) + } +} + +fn is_valid_qualifier_name(k: &str) -> bool { + // https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst#rules-for-each-purl-component + const ALLOWED_SPECIAL_CHARS: &[char] = &['.', '-', '_']; + !k.is_empty() + && k.chars().all(|c| c.is_ascii_alphanumeric() || ALLOWED_SPECIAL_CHARS.contains(&c)) +} + +/// An iterator over the qualifier key value pairs. +#[must_use] +pub struct IterMut<'a>(slice::IterMut<'a, (QualifierKey, SmallString)>); + +impl<'a> Iterator for IterMut<'a> { + type Item = (&'a QualifierKey, &'a mut SmallString); + + fn next(&mut self) -> Option { + let (k, v) = self.0.next()?; + Some((k, v)) + } + + fn size_hint(&self) -> (usize, Option) { + (self.0.len(), Some(self.0.len())) + } +} + +impl<'a> ExactSizeIterator for IterMut<'a> {} + +impl<'a> DoubleEndedIterator for IterMut<'a> { + fn next_back(&mut self) -> Option { + let (k, v) = self.0.next_back()?; + Some((k, v)) + } +} + +/// A qualifier name wrapper. +/// +/// Qualifier names must contain only ascii characters and cannot contain +/// certain characters. They are compared case-insensitively and are canonically +/// lowercase. This wrapper allows us to defer and sometimes avoid copying a key +/// to create a lowercase version of it. +#[must_use] +enum MixedQualifierKey { + /// A lowercase value that can be used as-is. + Lower(S), + /// A mixed-case value. + Mixed(S), +} + +impl MixedQualifierKey { + /// Convert the `QualifierName` into a `SmallString`. + /// + /// If `s` is a `SmallString` or `String` it can be consumed without + /// copying. + #[must_use] + fn into_key(self) -> QualifierKey + where + SmallString: From, + { + QualifierKey(match self { + MixedQualifierKey::Lower(s) => SmallString::from(s), + MixedQualifierKey::Mixed(s) => { + let mut s = SmallString::from(s); + s.make_ascii_lowercase(); + s + }, + }) + } +} + +impl AsRef for MixedQualifierKey +where + S: AsRef, +{ + fn as_ref(&self) -> &str { + match self { + MixedQualifierKey::Lower(s) => s.as_ref(), + MixedQualifierKey::Mixed(s) => s.as_ref(), + } + } +} + +fn check_qualifier_key(k: S) -> Result, ParseError> +where + S: AsRef, +{ + let ks = k.as_ref(); + if !is_valid_qualifier_name(ks) { + return Err(ParseError::InvalidQualifier); + } + if ks.chars().all(|c| c.is_ascii_lowercase()) { + Ok(MixedQualifierKey::Lower(k)) + } else { + Ok(MixedQualifierKey::Mixed(k)) + } +} + +impl Index for Qualifiers +where + K: AsRef, +{ + type Output = SmallString; + + fn index(&self, index: K) -> &Self::Output { + let index = index.as_ref(); + let Some(value) = self.get_index(index).map(|i| &self.qualifiers[i].1) else { + panic!("Qualifier {index:?} not found"); + }; + value + } +} + +impl IndexMut for Qualifiers +where + K: AsRef, +{ + fn index_mut(&mut self, index: K) -> &mut Self::Output { + let index = index.as_ref(); + let Some(value) = self.get_index(index).map(|i| &mut self.qualifiers[i].1) else { + panic!("Qualifier {index:?} not found"); + }; + value + } +} + +#[cfg(test)] +mod tests { + use super::well_known::Checksum; + use super::*; + + fn a_b_c() -> Qualifiers { + Qualifiers::try_from_iter([("a", "A"), ("b", "B"), ("c", "C")]) + .expect("Could not create test qualifiers") + } + + #[test] + fn try_from_iter_with_duplicates_returns_error() { + let error = Qualifiers::try_from_iter([("a", "A"), ("a", "A")]).unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn iter_iterates_qualifiers() { + let qualifiers = a_b_c(); + let mut iter = qualifiers.iter(); + assert_eq!(3, iter.len()); + assert_eq!(Some(("a", "A")), iter.next().map(|(k, v)| (k.as_str(), v))); + assert_eq!(2, iter.len()); + assert_eq!(Some(("c", "C")), iter.next_back().map(|(k, v)| (k.as_str(), v))); + assert_eq!(1, iter.len()); + assert_eq!(Some(("b", "B")), iter.next().map(|(k, v)| (k.as_str(), v))); + assert_eq!(0, iter.len()); + } + + #[test] + fn iter_mut_iterates_qualifiers() { + let mut qualifiers = a_b_c(); + + let mut iter = qualifiers.iter_mut(); + assert_eq!(3, iter.len()); + assert_eq!(Some(("a", "A")), iter.next().map(|(k, v)| (k.as_str(), v.as_str()))); + assert_eq!(2, iter.len()); + assert_eq!(Some(("c", "C")), iter.next_back().map(|(k, v)| (k.as_str(), v.as_str()))); + assert_eq!(1, iter.len()); + assert_eq!(Some(("b", "B")), iter.next().map(|(k, v)| (k.as_str(), v.as_str()))); + assert_eq!(0, iter.len()); + } + + #[test] + fn insert_inserts_in_order() { + let mut qualifiers = Qualifiers::default(); + qualifiers.insert("b", "B").expect("Could not set qualifier b"); + qualifiers.insert("c", "C").expect("Could not set qualifier c"); + qualifiers.insert("a", "A").expect("Could not set qualifier a"); + + assert_eq!( + vec![("a", "A"), ("b", "B"), ("c", "C")], + qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect::>(), + ); + } + + #[test] + fn insert_converts_to_lower() { + let mut qualifiers = Qualifiers::default(); + qualifiers.insert("A", "A").expect("Could not set qualifier a"); + + assert_eq!( + vec![("a", "A")], + qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + } + + #[test] + fn insert_with_empty_key_returns_error() { + let mut qualifiers = Qualifiers::default(); + let error = qualifiers + .insert("", "A") + .expect_err("Should not have been able to set qualifier with empty key"); + + assert!(matches!(error, ParseError::InvalidQualifier), "Got unexpected error {}", error,); + } + + #[test] + fn insert_with_invalid_key_returns_error() { + let mut qualifiers = Qualifiers::default(); + let error = qualifiers + .insert("!", "A") + .expect_err("Should not have been able to set qualifier with invalid key"); + + assert!(matches!(error, ParseError::InvalidQualifier), "Got unexpected error {}", error,); + } + + #[test] + fn remove_with_empty_key_does_nothing() { + let mut qualifiers = Qualifiers::default(); + assert_eq!(None, qualifiers.remove("")); + } + + #[test] + fn remove_with_invalid_key_does_nothing() { + let mut qualifiers = Qualifiers::default(); + assert_eq!(None, qualifiers.remove("!")); + } + + #[test] + fn remove_with_unset_qualifier_does_nothing() { + let mut qualifiers = Qualifiers::default(); + assert_eq!(None, qualifiers.remove("a")); + } + + #[test] + fn remove_unsets_qualifier() { + let mut qualifiers = a_b_c(); + + assert_eq!(Some("B"), qualifiers.remove("b").as_deref()); + + assert_eq!( + vec![("a", "A"), ("c", "C")], + qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect::>(), + ); + } + + #[test] + fn remove_with_uppercase_key_unsets_qualifier() { + let mut qualifiers = a_b_c(); + + assert_eq!(Some("B"), qualifiers.remove("B").as_deref()); + + assert_eq!( + vec![("a", "A"), ("c", "C")], + qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect::>(), + ); + } + + #[test] + fn get_returns_value() { + let qualifiers = a_b_c(); + + assert_eq!(Some("B"), qualifiers.get("b")); + } + + #[test] + fn get_with_uppercase_key_returns_value() { + let qualifiers = a_b_c(); + + assert_eq!(Some("B"), qualifiers.get("B")); + } + + #[test] + fn get_with_missing_key_returns_none() { + let qualifiers = a_b_c(); + + assert_eq!(None, qualifiers.get("d")); + } + + #[test] + fn get_with_invalid_key_returns_none() { + let qualifiers = a_b_c(); + + assert_eq!(None, qualifiers.get("!")); + } + + #[test] + fn get_mut_returns_mutable_value() { + let mut qualifiers = a_b_c(); + + let value = qualifiers.get_mut("b").expect("Value should be returned"); + assert_eq!("B", &*value); + *value = "b".into(); + assert_eq!(Some("b"), qualifiers.get("b")); + } + + #[test] + fn get_mut_with_uppercase_key_returns_value() { + let mut qualifiers = a_b_c(); + + assert_eq!(Some("B"), qualifiers.get_mut("B").map(|v| v.as_str())); + } + + #[test] + fn get_mut_for_missing_key_returns_none() { + let mut qualifiers = a_b_c(); + + assert_eq!(None, qualifiers.get_mut("d")); + } + + #[test] + fn get_mut_for_invalid_key_returns_none() { + let mut qualifiers = a_b_c(); + + assert_eq!(None, qualifiers.get_mut("!")); + } + + #[test] + fn contains_key_when_key_exists_returns_true() { + let qualifiers = a_b_c(); + assert!(qualifiers.contains_key("a")); + } + + #[test] + fn contains_key_when_lowercased_key_exists_returns_true() { + let qualifiers = a_b_c(); + assert!(qualifiers.contains_key("A")); + } + + #[test] + fn contains_key_when_key_does_not_exist_returns_false() { + let qualifiers = a_b_c(); + assert!(!qualifiers.contains_key("aa")); + } + + #[test] + fn contains_key_when_key_invalid_returns_false() { + let qualifiers = a_b_c(); + assert!(!qualifiers.contains_key("!")); + } + + #[test] + fn retain_removes_other_qualifiers() { + let mut qualifiers = a_b_c(); + qualifiers.retain(|k, _v| k == "b"); + assert_eq!( + vec![("b", "B")], + qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect::>(), + ); + } + + #[test] + fn retain_is_case_insensitive() { + let mut qualifiers = a_b_c(); + qualifiers.retain(|k, _v| k == "B"); + assert_eq!( + vec![("b", "B")], + qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect::>(), + ); + } + + #[test] + fn retain_mut_can_modify() { + let mut qualifiers = a_b_c(); + qualifiers.retain_mut(|k, v| { + v.make_ascii_lowercase(); + k == "b" + }); + assert_eq!( + vec![("b", "b")], + qualifiers.iter().map(|(k, v)| (k.as_str(), v)).collect::>(), + ); + } + + #[test] + fn entry_or_insert_when_exists_returns_existing() { + let mut qualifiers = a_b_c(); + let value = qualifiers.entry("A").unwrap().or_insert("aa"); + assert_eq!("A", value.as_str()); + } + + #[test] + fn entry_or_insert_when_does_not_exist_returns_inserted() { + let mut qualifiers = a_b_c(); + let value = qualifiers.entry("AA").unwrap().or_insert("aa"); + assert_eq!("aa", value.as_str()); + assert_eq!("aa", qualifiers["aa"]); + } + + #[test] + fn entry_or_insert_with_when_exists_does_not_call_function() { + let mut qualifiers = a_b_c(); + let value = qualifiers.entry("A").unwrap().or_insert_with::<_, &str>(|| { + panic!("Should not be called"); + }); + assert_eq!("A", value.as_str()); + } + + #[test] + fn entry_or_insert_with_when_does_not_exist_inserts() { + let mut qualifiers = a_b_c(); + let value = qualifiers.entry("AA").unwrap().or_insert_with(|| "aa"); + assert_eq!("aa", value.as_str()); + assert_eq!("aa", qualifiers["aa"]); + } + + #[test] + fn entry_or_modify_when_exists_modifies() { + let mut qualifiers = a_b_c(); + qualifiers.entry("A").unwrap().and_modify(|v| v.make_ascii_lowercase()); + assert_eq!("a", qualifiers["A"]); + } + + #[test] + fn entry_or_modify_when_does_not_exist_does_not_call_function() { + let mut qualifiers = a_b_c(); + qualifiers.entry("aa").unwrap().and_modify(|_v| { + panic!("Should not be called"); + }); + } + + #[test] + fn occupied_entry_remove_removes() { + let mut qualifiers = a_b_c(); + let Entry::Occupied(entry) = qualifiers.entry("a").unwrap() else { + panic!("Expected entry to exist before test"); + }; + let value = entry.remove(); + assert_eq!("A", value.as_str()); + assert!(!qualifiers.contains_key("a")); + } + + #[test] + fn occupied_entry_insert_overwrites() { + let mut qualifiers = a_b_c(); + let Entry::Occupied(mut entry) = qualifiers.entry("a").unwrap() else { + panic!("Expected entry to exist before test"); + }; + let old_value = entry.insert("AA"); + assert_eq!("A", old_value.as_str()); + assert_eq!("AA", entry.get()); + assert_eq!("AA", qualifiers["a"]); + } + + #[test] + fn occupied_entry_remove_entry_removes() { + let mut qualifiers = a_b_c(); + let Entry::Occupied(entry) = qualifiers.entry("a").unwrap() else { + panic!("Expected entry to exist before test"); + }; + let (key, value) = entry.remove_entry(); + assert_eq!(("a", "A"), (key.as_str(), value.as_str())); + assert!(!qualifiers.contains_key("a")); + } + + #[test] + #[should_panic] + fn index_does_not_exist_panics() { + let _value = &Qualifiers::default()["a"]; + } + + #[test] + #[should_panic] + fn index_mut_does_not_exist_panics() { + let _value = &mut Qualifiers::default()["a"]; + } + + #[test] + fn index_mut_can_set() { + let mut qualifiers = a_b_c(); + let value = &mut qualifiers["a"]; + *value = "new".into(); + assert_eq!("new", qualifiers["a"].as_str()); + } + + #[test] + fn len_returns_length() { + assert_eq!(0, Qualifiers::default().len()); + assert_eq!(1, Qualifiers::try_from_iter([("a", "a")]).unwrap().len()); + assert_eq!(2, Qualifiers::try_from_iter([("a", "a"), ("b", "b")]).unwrap().len(),); + } + + #[test] + fn clear_removes_all_entries() { + let mut qualifiers = a_b_c(); + qualifiers.clear(); + assert_eq!(0, qualifiers.len()); + assert!(qualifiers.iter().next().is_none()); + assert!(qualifiers.get("a").is_none()); + } + + #[test] + #[should_panic] + fn insert_typed_when_key_is_invalid_panics() { + struct Invalid; + + impl KnownQualifierKey for Invalid { + const KEY: &'static str = ""; + } + + impl From for SmallString { + fn from(_: Invalid) -> Self { + SmallString::from("invalid") + } + } + + let mut qualifiers = Qualifiers::default(); + qualifiers.insert_typed(Invalid); + } + + #[test] + fn try_insert_typed_when_successful_inserts_and_returns_ok() { + let mut checksums = Checksum::default(); + checksums.insert_raw("hash1", "00".to_owned()); + let mut qualifiers = Qualifiers::default(); + + qualifiers.try_insert_typed(checksums).unwrap(); + + assert_eq!(1, qualifiers.len()); + assert_eq!(Some("hash1:00"), qualifiers.get(Checksum::KEY)); + } + + #[test] + fn try_insert_typed_when_unsuccessful_does_insert_and_returns_error() { + let mut checksums = Checksum::default(); + checksums.insert_raw("hash1", "x".to_owned()); + let mut qualifiers = Qualifiers::try_from_iter([(Checksum::KEY, "hash1:00")]).unwrap(); + + let error = qualifiers.try_insert_typed(checksums).unwrap_err(); + + assert!(matches!(error, ParseError::InvalidQualifier)); + assert_eq!(1, qualifiers.len()); + assert_eq!(Some("hash1:00"), qualifiers.get(Checksum::KEY)); + } +} diff --git a/purl/src/qualifiers/well_known.rs b/purl/src/qualifiers/well_known.rs new file mode 100644 index 0000000..71fc363 --- /dev/null +++ b/purl/src/qualifiers/well_known.rs @@ -0,0 +1,426 @@ +//! Well-known qualifiers for use with [`super::Qualifiers::get_typed`] and +//! [`super::Qualifiers::insert_typed`]. + +use std::borrow::Cow; +use std::collections::HashMap; + +use hex::{FromHex, ToHex}; + +use crate::{copy_as_lowercase, ParseError, SmallString}; + +pub mod maven; + +/// A type that has an associated qualifier key. +pub trait KnownQualifierKey { + /// The key of the qualifier. + /// + /// This must be a valid qualifier key or attempting to set the qualifier + /// will panic. + const KEY: &'static str; +} + +macro_rules! str_ref_qualifier { + ($type_name:ident, $qualifier_key:literal, $human_name:literal) => { + #[doc = concat!("A ", $human_name, " qualifier.")] + pub struct $type_name<'a>(&'a str); + + impl<'a> AsRef for $type_name<'a> { + fn as_ref(&self) -> &str { + self.0 + } + } + + impl<'a> From<$type_name<'a>> for &'a str { + fn from(value: $type_name<'a>) -> Self { + value.0 + } + } + + impl<'a> From<&'a str> for $type_name<'a> { + fn from(value: &'a str) -> Self { + $type_name(value) + } + } + + impl<'a> From<$type_name<'a>> for $crate::SmallString { + fn from(value: $type_name<'a>) -> Self { + Self::from(<&'a str>::from(value)) + } + } + + impl<'a> ::std::ops::Deref for $type_name<'a> { + type Target = str; + + fn deref(&self) -> &str { + self.0 + } + } + + impl<'a> $crate::qualifiers::well_known::KnownQualifierKey for $type_name<'a> { + const KEY: &'static str = $qualifier_key; + } + }; +} +// Allow child modules to use this macro. +use str_ref_qualifier; + +str_ref_qualifier!(RepositoryUrl, "repository_url", "repository URL"); +str_ref_qualifier!(DownloadUrl, "download_url", "download URL"); +str_ref_qualifier!(VcsUrl, "vcs_url", "VCS URL"); +str_ref_qualifier!(FileName, "file_name", "file name"); + +/// A checksum qualifier. +/// +/// # Example +/// +/// ``` +/// use phylum_purl::qualifiers::well_known::Checksum; +/// use phylum_purl::GenericPurl; +/// +/// let sha256 = +/// hex::decode("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855").unwrap(); +/// let mut checksums = Checksum::default(); +/// checksums.insert("sha256", sha256); +/// let purl = GenericPurl::::builder("type".to_owned(), "name") +/// .try_with_typed_qualifier(Some(checksums)) +/// .unwrap() +/// .build() +/// .unwrap(); +/// assert_eq!( +/// "pkg:type/name?checksum=sha256:\ +/// e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", +/// purl.to_string(), +/// ); +/// ``` +#[derive(Clone, Debug, Default)] +pub struct Checksum<'a> { + algorithms: HashMap>, +} + +impl<'a> KnownQualifierKey for Checksum<'a> { + const KEY: &'static str = "checksum"; +} + +impl<'a> TryFrom<&'a str> for Checksum<'a> { + type Error = ParseError; + + fn try_from(value: &'a str) -> Result { + let mut algorithms = + HashMap::with_capacity(value.chars().filter(|c| *c == ',').count() + 1); + for hash in value.split(',') { + let Some((algorithm, bytes)) = hash.rsplit_once(':') else { + return Err(ParseError::InvalidQualifier); + }; + + let algorithm = copy_as_lowercase(algorithm); + + if algorithms.insert(algorithm, Cow::Borrowed(bytes)).is_some() { + // Duplicate algorithm. + return Err(ParseError::InvalidQualifier); + } + } + + Ok(Self { algorithms }) + } +} + +impl<'a> TryFrom> for SmallString { + type Error = ParseError; + + fn try_from(value: Checksum<'a>) -> Result { + let mut algorithms: Vec<_> = value.algorithms.into_iter().collect(); + algorithms.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + + let mut v = String::with_capacity( + algorithms.iter().map(|(k, v)| k.len() + 1 + v.len()).sum::() + algorithms.len() + - 1, + ); + for (algorithm, bytes) in algorithms { + if bytes.chars().any(|b| !b.is_ascii_hexdigit()) || bytes.len() % 2 != 0 { + return Err(ParseError::InvalidQualifier); + } + + if !v.is_empty() { + v.push(','); + } + v.push_str(&algorithm); + v.push(':'); + v.extend(bytes.chars().map(|c| c.to_ascii_lowercase())); + } + Ok(SmallString::from(v)) + } +} + +impl<'a> Checksum<'a> { + /// Get a reference to the hex bytes of a hash. + /// + /// The hash may not be valid hex bytes. + /// + /// To decode the value into bytes, use [`Self::get`]. + pub fn get_raw<'b>(&'b self, algorithm: &str) -> Option<&'b str> { + self.algorithms.get(algorithm).map(|v| &**v) + } + + /// Get the value of a hash as type `T`. + /// + /// To get the hex bytes, use [`Self::get_raw`]. + pub fn get(&self, algorithm: &str) -> Result, T::Error> + where + T: FromHex, + { + self.get_raw(algorithm).map(T::from_hex).transpose() + } + + /// Get an iterator over all the algorithm names. + pub fn algorithms(&self) -> impl Iterator { + self.algorithms.keys().map(|k| &**k) + } + + /// Set the hex bytes of a hash. + /// + /// The hex bytes are not validated. + /// + /// If the value is not already hex-encoded, use `[Self::insert]`. + pub fn insert_raw(&mut self, algorithm: &str, value: String) { + if let Some(v) = self.algorithms.get_mut(algorithm) { + *v = Cow::Owned(value); + } else { + self.algorithms.insert(copy_as_lowercase(algorithm), Cow::Owned(value)); + } + } + + /// Set the value of a hash. + /// + /// The value will be hex encoded. If the value is already a hex string, use + /// [`Self::insert_raw`]. + pub fn insert(&mut self, algorithm: &str, value: T) + where + T: ToHex, + { + self.insert_raw(algorithm, value.encode_hex()) + } + + /// Remove a hash. + pub fn remove(&mut self, algorithm: &str) { + self.algorithms.remove(algorithm); + } +} + +#[cfg(test)] +mod tests { + use std::borrow::Cow; + + use super::*; + use crate::{GenericPurl, Qualifiers}; + + #[test] + fn can_get_repository_url() { + const URL: &str = "docker.io/library/debian"; + let purl = GenericPurl::builder(Cow::Borrowed("oci"), "debian") + .with_qualifier("repository_url", URL) + .unwrap() + .build() + .unwrap(); + assert_eq!(Some(URL), purl.qualifiers().get_typed::().as_deref()) + } + + #[test] + fn can_set_repository_url() { + const URL: &str = "ghcr.io/debian"; + let purl = GenericPurl::builder(Cow::Borrowed("oci"), "debian") + .with_typed_qualifier(Some(RepositoryUrl::from(URL))) + .build() + .unwrap(); + assert_eq!(Some(URL), purl.qualifiers().get("repository_url")) + } + + #[test] + fn can_remove_repository_url() { + let mut qualifiers = + Qualifiers::try_from_iter([("repository_url", "gcr.io/distroless")]).unwrap(); + assert!(qualifiers.contains_typed::()); + qualifiers.remove_typed::(); + assert!(!qualifiers.contains_typed::()); + } + + mod checksum { + use std::fmt::Write; + + use hex::FromHexError; + use maplit::hashmap; + + use super::*; + + #[test] + fn get_raw_gets_when_set_gets_whatever_value() { + let checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("x"), + }, + }; + assert_eq!(Some("x"), checksums.get_raw("hash1")); + } + + #[test] + fn get_raw_gets_when_unset_returns_none() { + let checksums = Checksum::default(); + assert_eq!(None, checksums.get_raw("hash1")); + } + + #[test] + fn get_when_set_and_valid_returns_value() { + let checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("000102"), + }, + }; + assert_eq!( + Some([0u8, 1, 2].as_slice()), + checksums.get::>("hash1").unwrap().as_deref(), + ); + } + + #[test] + fn get_when_set_and_invalid_returns_error() { + let checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("xx"), + }, + }; + let error = checksums.get::>("hash1").unwrap_err(); + assert_eq!(FromHexError::InvalidHexCharacter { c: 'x', index: 0 }, error); + } + + #[test] + fn get_when_unset_returns_none() { + let checksums = Checksum::default(); + assert_eq!(None, checksums.get::>("hash1").unwrap().as_deref()); + } + + #[test] + fn algorithms_returns_algorithms() { + let checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("xx"), + }, + }; + let algorithms: Vec<_> = checksums.algorithms().collect(); + assert_eq!(&["hash1"], &algorithms[..]); + } + + #[test] + fn insert_raw_when_already_set_replaces() { + let mut checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("xx"), + }, + }; + checksums.insert_raw("hash1", "yy".to_owned()); + assert_eq!(Some("yy"), checksums.get_raw("hash1")); + } + + #[test] + fn insert_raw_when_already_set_with_different_case_replaces() { + let mut checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("xx"), + }, + }; + checksums.insert_raw("HASH1", "yy".to_owned()); + assert_eq!(Some("yy"), checksums.get_raw("hash1")); + } + + #[test] + fn insert_raw_when_not_set_inserts() { + let mut checksums = Checksum::default(); + checksums.insert_raw("hash1", "yy".to_owned()); + assert_eq!(Some("yy"), checksums.get_raw("hash1")); + } + + #[test] + fn insert_raw_lowercases_algorithm() { + let mut checksums = Checksum::default(); + checksums.insert_raw("HASH1", "yy".to_owned()); + assert_eq!(Some("yy"), checksums.get_raw("hash1")); + } + + #[test] + fn insert_inserts_encoded() { + let mut checksums = Checksum::default(); + checksums.insert("hash1", "\x00\x01\x02"); + assert_eq!(Some("000102"), checksums.get_raw("hash1")); + } + + #[test] + fn remove_removes() { + let mut checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("xx"), + }, + }; + checksums.remove("hash1"); + assert_eq!(None, checksums.get_raw("hash1")); + } + + #[test] + fn try_from_str_when_valid_parses() { + // This is valid enough for parsing. + let checksums = Checksum::try_from("HASH1:0,hash0:x").unwrap(); + assert_eq!(Some("0"), checksums.get_raw("hash1")); + assert_eq!(Some("x"), checksums.get_raw("hash0")); + } + + #[test] + fn try_from_str_when_invalid_returns_error() { + let error = Checksum::try_from(",").unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn try_from_str_when_algorithm_is_duplicated_returns_error() { + let error = Checksum::try_from("hash1:00,hash1:00").unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn try_into_str_when_non_hex_returns_error() { + let checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("xx"), + }, + }; + let error = SmallString::try_from(checksums).unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn try_into_str_when_partial_byte_returns_error() { + let checksums = Checksum { + algorithms: hashmap! { + SmallString::from("hash1") => Cow::Borrowed("0"), + }, + }; + let error = SmallString::try_from(checksums).unwrap_err(); + assert!(matches!(error, ParseError::InvalidQualifier)); + } + + #[test] + fn try_into_str_returns_algorithms_in_order_with_lowercase_hex_bytes() { + let mut expected = SmallString::default(); + for i in 0..10 { + if !expected.is_empty() { + expected.push(','); + } + write!(expected, "hash{i}:{i:02x}").unwrap(); + } + + let mut checksums = Checksum::default(); + for i in (0..10u8).rev() { + checksums.insert(&format!("HASH{i}"), [i]); + } + let actual = SmallString::try_from(checksums).unwrap(); + + assert_eq!(expected, actual); + } + } +} diff --git a/purl/src/qualifiers/well_known/maven.rs b/purl/src/qualifiers/well_known/maven.rs new file mode 100644 index 0000000..cb3fa1d --- /dev/null +++ b/purl/src/qualifiers/well_known/maven.rs @@ -0,0 +1,6 @@ +//! Known qualifier types for Maven. + +use super::str_ref_qualifier; + +str_ref_qualifier!(Classifier, "classifier", "classifier"); +str_ref_qualifier!(Type, "type", "type"); diff --git a/purl_test/Cargo.toml b/purl_test/Cargo.toml new file mode 100644 index 0000000..6471a3e --- /dev/null +++ b/purl_test/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "purl_test" +version = "0.1.0" +edition = "2021" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +phylum-purl = { path = "../purl" } diff --git a/purl_test/src/lib.rs b/purl_test/src/lib.rs new file mode 100644 index 0000000..9b78e26 --- /dev/null +++ b/purl_test/src/lib.rs @@ -0,0 +1,742 @@ +// This file is autogenerated by generate_tests.rs. +// Use `cargo xtask codegen` to regenerate it. +#![cfg(test)] + +use std::collections::HashMap; +use std::str::FromStr; + +use phylum_purl::{PackageError, PackageType, Purl}; +#[test] +/// valid maven purl +fn valid_maven_purl() { + let parsed = match Purl::from_str("pkg:maven/org.apache.commons/io@1.3.4") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:maven/org.apache.commons/io@1.3.4", error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("org.apache.commons"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("io", parsed.name(), "Incorrect name"); + assert_eq!(Some("1.3.4"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/org.apache.commons/io@1.3.4", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// basic valid maven purl without version +fn basic_valid_maven_purl_without_version() { + let parsed = match Purl::from_str("pkg:maven/org.apache.commons/io") { + Ok(purl) => purl, + Err(error) => { + panic!("Failed to parse valid purl {:?}: {}", "pkg:maven/org.apache.commons/io", error) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("org.apache.commons"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("io", parsed.name(), "Incorrect name"); + assert_eq!(None, parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/org.apache.commons/io", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// valid go purl without version and with subpath +fn valid_go_purl_without_version_and_with_subpath() { + let parsed = match Purl::from_str( + "pkg:GOLANG/google.golang.org/genproto#/googleapis/api/annotations/", + ) { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:GOLANG/google.golang.org/genproto#/googleapis/api/annotations/", error + ) + }, + }; + assert_eq!(&PackageType::Golang, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("google.golang.org"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("genproto", parsed.name(), "Incorrect name"); + assert_eq!(None, parsed.version(), "Incorrect version"); + assert_eq!(Some("googleapis/api/annotations"), parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:golang/google.golang.org/genproto#googleapis/api/annotations", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// valid go purl with version and subpath +fn valid_go_purl_with_version_and_subpath() { + let parsed = match Purl::from_str( + "pkg:GOLANG/google.golang.org/genproto@abcdedf#/googleapis/api/annotations/", + ) { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:GOLANG/google.golang.org/genproto@abcdedf#/googleapis/api/annotations/", error + ) + }, + }; + assert_eq!(&PackageType::Golang, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("google.golang.org"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("genproto", parsed.name(), "Incorrect name"); + assert_eq!(Some("abcdedf"), parsed.version(), "Incorrect version"); + assert_eq!(Some("googleapis/api/annotations"), parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:golang/google.golang.org/genproto@abcdedf#googleapis/api/annotations", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// unsupported: bitbucket namespace and name should be lowercased +fn unsupported_bitbucket_namespace_and_name_should_be_lowercased() { + assert!( + matches!( + Purl::from_str("pkg:bitbucket/birKenfeld/pyGments-main@244fd47e07d1014f0aed9c"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "bitbucket" + ); +} +#[test] +/// unsupported: github namespace and name should be lowercased +fn unsupported_github_namespace_and_name_should_be_lowercased() { + assert!( + matches!( + Purl::from_str("pkg:github/Package-url/purl-Spec@244fd47e07d1004f0aed9c"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "github" + ); +} +#[test] +/// unsupported: debian can use qualifiers +fn unsupported_debian_can_use_qualifiers() { + assert!( + matches!( + Purl::from_str("pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "deb" + ); +} +#[test] +/// unsupported: docker uses qualifiers and hash image id as versions +fn unsupported_docker_uses_qualifiers_and_hash_image_id_as_versions() { + assert!( + matches!( + Purl::from_str( + "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.\ + io" + ), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "docker" + ); +} +#[test] +/// Java gem can use a qualifier +fn java_gem_can_use_a_qualifier() { + let parsed = match Purl::from_str("pkg:gem/jruby-launcher@1.1.2?Platform=java") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:gem/jruby-launcher@1.1.2?Platform=java", error + ) + }, + }; + assert_eq!(&PackageType::Gem, parsed.package_type(), "Incorrect package type"); + assert_eq!(None, parsed.namespace(), "Incorrect namespace"); + assert_eq!("jruby-launcher", parsed.name(), "Incorrect name"); + assert_eq!(Some("1.1.2"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = [("platform", "java")].into_iter().collect(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:gem/jruby-launcher@1.1.2?platform=java", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// maven often uses qualifiers +fn maven_often_uses_qualifiers() { + let parsed = match Purl::from_str( + "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repositorY_url=repo.\ + spring.io/release", + ) { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&\ + repositorY_url=repo.spring.io/release", + error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("org.apache.xmlgraphics"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("batik-anim", parsed.name(), "Incorrect name"); + assert_eq!(Some("1.9.1"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = + [("classifier", "sources"), ("repository_url", "repo.spring.io/release")] + .into_iter() + .collect(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.\ + spring.io/release", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// maven pom reference +fn maven_pom_reference() { + let parsed = match Purl::from_str( + "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repositorY_url=repo.\ + spring.io/release", + ) { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&\ + repositorY_url=repo.spring.io/release", + error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("org.apache.xmlgraphics"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("batik-anim", parsed.name(), "Incorrect name"); + assert_eq!(Some("1.9.1"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = + [("extension", "pom"), ("repository_url", "repo.spring.io/release")].into_iter().collect(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.\ + spring.io/release", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// maven can come with a type qualifier +fn maven_can_come_with_a_type_qualifier() { + let parsed = + match Purl::from_str("pkg:Maven/net.sf.jacob-project/jacob@1.14.3?classifier=x86&type=dll") + { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:Maven/net.sf.jacob-project/jacob@1.14.3?classifier=x86&type=dll", error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("net.sf.jacob-project"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("jacob", parsed.name(), "Incorrect name"); + assert_eq!(Some("1.14.3"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = + [("classifier", "x86"), ("type", "dll")].into_iter().collect(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/net.sf.jacob-project/jacob@1.14.3?classifier=x86&type=dll", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// npm can be scoped +fn npm_can_be_scoped() { + let parsed = match Purl::from_str("pkg:npm/%40angular/animation@12.3.1") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:npm/%40angular/animation@12.3.1", error + ) + }, + }; + assert_eq!(&PackageType::Npm, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("@angular"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("animation", parsed.name(), "Incorrect name"); + assert_eq!(Some("12.3.1"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:npm/%40angular/animation@12.3.1", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// pypi names have special rules and not case sensitive +fn pypi_names_have_special_rules_and_not_case_sensitive() { + let parsed = match Purl::from_str("pkg:PYPI/Django_package@1.11.1.dev1") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:PYPI/Django_package@1.11.1.dev1", error + ) + }, + }; + assert_eq!(&PackageType::PyPI, parsed.package_type(), "Incorrect package type"); + assert_eq!(None, parsed.namespace(), "Incorrect namespace"); + assert_eq!("django-package", parsed.name(), "Incorrect name"); + assert_eq!(Some("1.11.1.dev1"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:pypi/django-package@1.11.1.dev1", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// unsupported: rpm often use qualifiers +fn unsupported_rpm_often_use_qualifiers() { + assert!( + matches!( + Purl::from_str("pkg:Rpm/fedora/curl@7.50.3-1.fc25?Arch=i386&Distro=fedora-25"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "rpm" + ); +} +#[test] +/// a scheme is always required +fn a_scheme_is_always_required() { + assert!( + Purl::from_str("EnterpriseLibrary.Common@6.0.1304").is_err(), + "{}", + "a scheme is always required" + ); +} +#[test] +/// a type is always required +fn a_type_is_always_required() { + assert!( + Purl::from_str("pkg:EnterpriseLibrary.Common@6.0.1304").is_err(), + "{}", + "a type is always required" + ); +} +#[test] +/// a name is required +fn a_name_is_required() { + assert!(Purl::from_str("pkg:maven/@1.3.4").is_err(), "{}", "a name is required"); +} +#[test] +/// slash / after scheme is not significant +fn slash_after_scheme_is_not_significant() { + let parsed = match Purl::from_str("pkg:/maven/org.apache.commons/io") { + Ok(purl) => purl, + Err(error) => { + panic!("Failed to parse valid purl {:?}: {}", "pkg:/maven/org.apache.commons/io", error) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("org.apache.commons"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("io", parsed.name(), "Incorrect name"); + assert_eq!(None, parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/org.apache.commons/io", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// double slash // after scheme is not significant +fn double_slash_after_scheme_is_not_significant() { + let parsed = match Purl::from_str("pkg://maven/org.apache.commons/io") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg://maven/org.apache.commons/io", error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("org.apache.commons"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("io", parsed.name(), "Incorrect name"); + assert_eq!(None, parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/org.apache.commons/io", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// slash /// after type is not significant +fn slash_after_type_is_not_significant() { + let parsed = match Purl::from_str("pkg:///maven/org.apache.commons/io") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:///maven/org.apache.commons/io", error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("org.apache.commons"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("io", parsed.name(), "Incorrect name"); + assert_eq!(None, parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/org.apache.commons/io", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// valid maven purl with case sensitive namespace and name +fn valid_maven_purl_with_case_sensitive_namespace_and_name() { + let parsed = match Purl::from_str("pkg:maven/HTTPClient/HTTPClient@0.3-3") { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:maven/HTTPClient/HTTPClient@0.3-3", error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("HTTPClient"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("HTTPClient", parsed.name(), "Incorrect name"); + assert_eq!(Some("0.3-3"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = HashMap::new(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/HTTPClient/HTTPClient@0.3-3", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// valid maven purl containing a space in the version and qualifier +fn valid_maven_purl_containing_a_space_in_the_version_and_qualifier() { + let parsed = match Purl::from_str("pkg:maven/mygroup/myartifact@1.0.0%20Final?mykey=my%20value") + { + Ok(purl) => purl, + Err(error) => { + panic!( + "Failed to parse valid purl {:?}: {}", + "pkg:maven/mygroup/myartifact@1.0.0%20Final?mykey=my%20value", error + ) + }, + }; + assert_eq!(&PackageType::Maven, parsed.package_type(), "Incorrect package type"); + assert_eq!(Some("mygroup"), parsed.namespace(), "Incorrect namespace"); + assert_eq!("myartifact", parsed.name(), "Incorrect name"); + assert_eq!(Some("1.0.0 Final"), parsed.version(), "Incorrect version"); + assert_eq!(None, parsed.subpath(), "Incorrect subpath"); + let expected_qualifiers: HashMap<&str, &str> = [("mykey", "my value")].into_iter().collect(); + assert_eq!( + expected_qualifiers, + parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>() + ); + assert_eq!( + "pkg:maven/mygroup/myartifact@1.0.0%20Final?mykey=my%20value", + &parsed.to_string(), + "Incorrect string representation" + ); +} +#[test] +/// checks for invalid qualifier keys +fn checks_for_invalid_qualifier_keys() { + assert!( + Purl::from_str("pkg:npm/myartifact@1.0.0?in%20production=true").is_err(), + "{}", + "checks for invalid qualifier keys" + ); +} +#[test] +/// unsupported: valid conan purl +fn unsupported_valid_conan_purl() { + assert!( + matches!(Purl::from_str("pkg:conan/cctz@2.3"), Err(PackageError::UnsupportedType)), + "Type {} is not supported", + "conan" + ); +} +#[test] +/// unsupported: valid conan purl with namespace and qualifier channel +fn unsupported_valid_conan_purl_with_namespace_and_qualifier_channel() { + assert!( + matches!( + Purl::from_str("pkg:conan/bincrafters/cctz@2.3?channel=stable"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "conan" + ); +} +#[test] +/// invalid conan purl only namespace +fn invalid_conan_purl_only_namespace() { + assert!( + Purl::from_str("pkg:conan/bincrafters/cctz@2.3").is_err(), + "{}", + "invalid conan purl only namespace" + ); +} +#[test] +/// invalid conan purl only channel qualifier +fn invalid_conan_purl_only_channel_qualifier() { + assert!( + Purl::from_str("pkg:conan/cctz@2.3?channel=stable").is_err(), + "{}", + "invalid conan purl only channel qualifier" + ); +} +#[test] +/// unsupported: valid conda purl with qualifiers +fn unsupported_valid_conda_purl_with_qualifiers() { + assert!( + matches!( + Purl::from_str( + "pkg:conda/absl-py@0.4.1?build=py36h06a4308_0&channel=main&subdir=linux-64&\ + type=tar.bz2" + ), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "conda" + ); +} +#[test] +/// unsupported: valid cran purl +fn unsupported_valid_cran_purl() { + assert!( + matches!(Purl::from_str("pkg:cran/A3@0.9.1"), Err(PackageError::UnsupportedType)), + "Type {} is not supported", + "cran" + ); +} +#[test] +/// invalid cran purl without name +fn invalid_cran_purl_without_name() { + assert!(Purl::from_str("pkg:cran/@0.9.1").is_err(), "{}", "invalid cran purl without name"); +} +#[test] +/// invalid cran purl without version +fn invalid_cran_purl_without_version() { + assert!(Purl::from_str("pkg:cran/A3").is_err(), "{}", "invalid cran purl without version"); +} +#[test] +/// unsupported: valid swift purl +fn unsupported_valid_swift_purl() { + assert!( + matches!( + Purl::from_str("pkg:swift/github.com/Alamofire/Alamofire@5.4.3"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "swift" + ); +} +#[test] +/// invalid swift purl without namespace +fn invalid_swift_purl_without_namespace() { + assert!( + Purl::from_str("pkg:swift/Alamofire@5.4.3").is_err(), + "{}", + "invalid swift purl without namespace" + ); +} +#[test] +/// invalid swift purl without name +fn invalid_swift_purl_without_name() { + assert!( + Purl::from_str("pkg:swift/github.com/Alamofire/@5.4.3").is_err(), + "{}", + "invalid swift purl without name" + ); +} +#[test] +/// invalid swift purl without version +fn invalid_swift_purl_without_version() { + assert!( + Purl::from_str("pkg:swift/github.com/Alamofire/Alamofire").is_err(), + "{}", + "invalid swift purl without version" + ); +} +#[test] +/// unsupported: valid hackage purl +fn unsupported_valid_hackage_purl() { + assert!( + matches!( + Purl::from_str("pkg:hackage/AC-HalfInteger@1.2.1"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "hackage" + ); +} +#[test] +/// name and version are always required +fn name_and_version_are_always_required() { + assert!(Purl::from_str("pkg:hackage").is_err(), "{}", "name and version are always required"); +} +#[test] +/// unsupported: minimal Hugging Face model +fn unsupported_minimal_hugging_face_model() { + assert!( + matches!( + Purl::from_str( + "pkg:huggingface/distilbert-base-uncased@043235d6088ecd3dd5fb5ca3592b6913fd516027" + ), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "huggingface" + ); +} +#[test] +/// unsupported: Hugging Face model with staging endpoint +fn unsupported_hugging_face_model_with_staging_endpoint() { + assert!( + matches!(Purl::from_str("pkg:huggingface/microsoft/deberta-v3-base@559062ad13d311b87b2c455e67dcd5f1c8f65111?repository_url=https://hub-ci.huggingface.co"), + Err(PackageError::UnsupportedType)), "Type {} is not supported", "huggingface" + ); +} +#[test] +/// unsupported: Hugging Face model with various cases +fn unsupported_hugging_face_model_with_various_cases() { + assert!( + matches!( + Purl::from_str( + "pkg:huggingface/EleutherAI/gpt-neo-1.3B@797174552AE47F449AB70B684CABCB6603E5E85E" + ), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "huggingface" + ); +} +#[test] +/// unsupported: MLflow model tracked in Azure Databricks (case insensitive) +fn unsupported_m_lflow_model_tracked_in_azure_databricks_case_insensitive_() { + assert!( + matches!(Purl::from_str("pkg:mlflow/CreditFraud@3?repository_url=https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow"), + Err(PackageError::UnsupportedType)), "Type {} is not supported", "mlflow" + ); +} +#[test] +/// unsupported: MLflow model tracked in Azure ML (case sensitive) +fn unsupported_m_lflow_model_tracked_in_azure_ml_case_sensitive_() { + assert!( + matches!(Purl::from_str("pkg:mlflow/CreditFraud@3?repository_url=https://westus2.api.azureml.ms/mlflow/v1.0/subscriptions/a50f2011-fab8-4164-af23-c62881ef8c95/resourceGroups/TestResourceGroup/providers/Microsoft.MachineLearningServices/workspaces/TestWorkspace"), + Err(PackageError::UnsupportedType)), "Type {} is not supported", "mlflow" + ); +} +#[test] +/// unsupported: MLflow model with unique identifiers +fn unsupported_m_lflow_model_with_unique_identifiers() { + assert!( + matches!(Purl::from_str("pkg:mlflow/trafficsigns@10?model_uuid=36233173b22f4c89b451f1228d700d49&run_id=410a3121-2709-4f88-98dd-dba0ef056b0a&repository_url=https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow"), + Err(PackageError::UnsupportedType)), "Type {} is not supported", "mlflow" + ); +} +#[test] +/// unsupported: composer names are not case sensitive +fn unsupported_composer_names_are_not_case_sensitive() { + assert!( + matches!( + Purl::from_str("pkg:composer/Laravel/Laravel@5.5.0"), + Err(PackageError::UnsupportedType) + ), + "Type {} is not supported", + "composer" + ); +} diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..4caaf83 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,15 @@ +format_code_in_doc_comments = true +group_imports = "StdExternalCrate" +match_block_trailing_comma = true +condense_wildcard_suffixes = true +use_field_init_shorthand = true +normalize_doc_attributes = true +overflow_delimited_expr = true +imports_granularity = "Module" +use_small_heuristics = "Max" +normalize_comments = true +reorder_impl_items = true +use_try_shorthand = true +newline_style = "Unix" +format_strings = true +wrap_comments = true diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index e7a11a9..0000000 --- a/src/main.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("Hello, world!"); -} diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml new file mode 100644 index 0000000..1ae4d67 --- /dev/null +++ b/xtask/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "xtask" +version = "0.1.0" +edition = "2021" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.0.29", features = ["derive"] } +convert_case = "0.6.0" +lazy_static = "1.4.0" +prettyplease = "0.2.4" +proc-macro2 = "1.0.47" +phylum-purl = { path = "../purl" } +quote = "1.0.21" +regex = "1.7.0" +serde = { version = "1.0.150", features = ["derive"] } +serde_json = "1.0.89" +syn = { version = "2.0.13", features = ["full"] } diff --git a/xtask/src/generate_tests.rs b/xtask/src/generate_tests.rs new file mode 100644 index 0000000..d91bc9e --- /dev/null +++ b/xtask/src/generate_tests.rs @@ -0,0 +1,161 @@ +use std::collections::HashMap; +use std::fs; +use std::io::{BufWriter, Write}; +use std::str::FromStr; + +use convert_case::{Case, Casing}; +use lazy_static::lazy_static; +use phylum_purl::PackageType; +use proc_macro2::TokenStream; +use quote::{format_ident, quote}; +use regex::Regex; +use serde::Deserialize; +use syn::parse_quote; + +use crate::workspace_dir; + +const TEST_SUITE_DATA: &str = include_str!("generate_tests/test-suite-data.json"); +const BLACKLIST: &[&str] = &[ + // NuGet package names are not case sensitive. package-url/purl-spec#226 + "nuget names are case sensitive", +]; + +lazy_static! { + static ref UNDERSCORES: Regex = Regex::new("__+").unwrap(); +} + +#[derive(Deserialize)] +struct Test<'a> { + description: &'a str, + purl: &'a str, + canonical_purl: Option<&'a str>, + r#type: Option<&'a str>, + namespace: Option<&'a str>, + name: Option<&'a str>, + version: Option<&'a str>, + qualifiers: Option>, + subpath: Option<&'a str>, + is_invalid: bool, +} + +pub fn main() { + let tests: Vec = + serde_json::from_str(TEST_SUITE_DATA).expect("Could not read test-suite-data.json"); + + let tests = + tests.into_iter().filter(|t| !BLACKLIST.contains(&t.description)).map(test_to_tokens); + let suite = parse_quote! { + use std::collections::HashMap; + use std::str::FromStr; + use phylum_purl::{PackageError, PackageType, Purl}; + + #(#tests)* + }; + + let file = fs::File::create(workspace_dir().join("purl_test/src/lib.rs")) + .expect("Could not create test_suite.rs"); + let mut file = BufWriter::new(file); + + writeln!(file, "// This file is autogenerated by generate_tests.rs.").unwrap(); + writeln!(file, "// Use `cargo xtask codegen` to regenerate it.").unwrap(); + writeln!(file, "#![cfg(test)]").unwrap(); + writeln!(file).unwrap(); + writeln!(file, "{}", prettyplease::unparse(&suite)).unwrap(); +} + +fn test_to_tokens(test: Test) -> Option { + let Test { + description, + purl, + canonical_purl, + r#type, + namespace, + name, + version, + qualifiers, + subpath, + is_invalid, + } = test; + let test_name = format_ident!( + "{}", + UNDERSCORES.replace_all( + &description.to_case(Case::Snake).replace(|c: char| !c.is_alphanumeric(), "_"), + "_" + ) + ); + let parsed_type = r#type.and_then(|t| PackageType::from_str(t).ok()); + Some(if is_invalid { + quote! { + #[test] + #[doc = #description] + fn #test_name() { + assert!(Purl::from_str(#purl).is_err(), "{}", #description); + } + } + } else if let Some(parsed_type) = parsed_type { + let parsed_type = type_to_tokens(parsed_type); + let name = name.expect("Valid test must have package name"); + let namespace = option_to_tokens(namespace); + let version = option_to_tokens(version); + let subpath = option_to_tokens(subpath); + let qualifiers = qualifiers_to_tokens(qualifiers); + + quote! { + #[test] + #[doc = #description] + fn #test_name() { + let parsed = match Purl::from_str(#purl) { + Ok(purl) => purl, + Err(error) => panic!("Failed to parse valid purl {:?}: {}", #purl, error), + }; + + assert_eq!(&#parsed_type, parsed.package_type(), "Incorrect package type"); + assert_eq!(#namespace, parsed.namespace(), "Incorrect namespace"); + assert_eq!(#name, parsed.name(), "Incorrect name"); + assert_eq!(#version, parsed.version(), "Incorrect version"); + assert_eq!(#subpath, parsed.subpath(), "Incorrect subpath"); + + let expected_qualifiers: HashMap<&str, &str> = #qualifiers; + assert_eq!(expected_qualifiers, parsed.qualifiers().iter().map(|(k, v)| (k.as_str(), v)).collect::>()); + + assert_eq!(#canonical_purl, &parsed.to_string(), "Incorrect string representation"); + } + } + } else { + // For all the unsupported cases, we can at least ensure that we get the + // expected error. + let test_name = format_ident!("unsupported_{}", test_name); + let description = format!("unsupported: {}", description); + quote! { + #[test] + #[doc = #description] + fn #test_name() { + assert!(matches!(Purl::from_str(#purl), Err(PackageError::UnsupportedType)), "Type {} is not supported", #r#type); + } + } + }) +} + +fn type_to_tokens(value: PackageType) -> TokenStream { + let ident = format_ident!("{}", format!("{:?}", value)); + quote! { PackageType::#ident } +} + +fn option_to_tokens(value: Option<&str>) -> TokenStream { + if let Some(value) = value { + quote! { Some(#value) } + } else { + quote! { None } + } +} + +fn qualifiers_to_tokens(value: Option>) -> TokenStream { + let mut value: Vec<(&str, &str)> = value.unwrap_or_default().into_iter().collect(); + value.sort_unstable(); + if value.is_empty() { + quote! { HashMap::new() } + } else { + let entries = value.into_iter().map(|(k, v)| quote! { (#k, #v) }); + quote! { [#(#entries),*].into_iter().collect() } + } +} diff --git a/xtask/src/generate_tests/README.md b/xtask/src/generate_tests/README.md new file mode 100644 index 0000000..0ac9998 --- /dev/null +++ b/xtask/src/generate_tests/README.md @@ -0,0 +1,2 @@ +This is the test suite from the PURL spec repository: +https://github.com/package-url/purl-spec/blob/master/test-suite-data.json diff --git a/xtask/src/generate_tests/test-suite-data.json b/xtask/src/generate_tests/test-suite-data.json new file mode 100644 index 0000000..a819fc8 --- /dev/null +++ b/xtask/src/generate_tests/test-suite-data.json @@ -0,0 +1,554 @@ +[ + { + "description": "valid maven purl", + "purl": "pkg:maven/org.apache.commons/io@1.3.4", + "canonical_purl": "pkg:maven/org.apache.commons/io@1.3.4", + "type": "maven", + "namespace": "org.apache.commons", + "name": "io", + "version": "1.3.4", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "basic valid maven purl without version", + "purl": "pkg:maven/org.apache.commons/io", + "canonical_purl": "pkg:maven/org.apache.commons/io", + "type": "maven", + "namespace": "org.apache.commons", + "name": "io", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "valid go purl without version and with subpath", + "purl": "pkg:GOLANG/google.golang.org/genproto#/googleapis/api/annotations/", + "canonical_purl": "pkg:golang/google.golang.org/genproto#googleapis/api/annotations", + "type": "golang", + "namespace": "google.golang.org", + "name": "genproto", + "version": null, + "qualifiers": null, + "subpath": "googleapis/api/annotations", + "is_invalid": false + }, + { + "description": "valid go purl with version and subpath", + "purl": "pkg:GOLANG/google.golang.org/genproto@abcdedf#/googleapis/api/annotations/", + "canonical_purl": "pkg:golang/google.golang.org/genproto@abcdedf#googleapis/api/annotations", + "type": "golang", + "namespace": "google.golang.org", + "name": "genproto", + "version": "abcdedf", + "qualifiers": null, + "subpath": "googleapis/api/annotations", + "is_invalid": false + }, + { + "description": "bitbucket namespace and name should be lowercased", + "purl": "pkg:bitbucket/birKenfeld/pyGments-main@244fd47e07d1014f0aed9c", + "canonical_purl": "pkg:bitbucket/birkenfeld/pygments-main@244fd47e07d1014f0aed9c", + "type": "bitbucket", + "namespace": "birkenfeld", + "name": "pygments-main", + "version": "244fd47e07d1014f0aed9c", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "github namespace and name should be lowercased", + "purl": "pkg:github/Package-url/purl-Spec@244fd47e07d1004f0aed9c", + "canonical_purl": "pkg:github/package-url/purl-spec@244fd47e07d1004f0aed9c", + "type": "github", + "namespace": "package-url", + "name": "purl-spec", + "version": "244fd47e07d1004f0aed9c", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "debian can use qualifiers", + "purl": "pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie", + "canonical_purl": "pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie", + "type": "deb", + "namespace": "debian", + "name": "curl", + "version": "7.50.3-1", + "qualifiers": {"arch": "i386", "distro": "jessie"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "docker uses qualifiers and hash image id as versions", + "purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io", + "canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io", + "type": "docker", + "namespace": "customer", + "name": "dockerimage", + "version": "sha256:244fd47e07d1004f0aed9c", + "qualifiers": {"repository_url": "gcr.io"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "Java gem can use a qualifier", + "purl": "pkg:gem/jruby-launcher@1.1.2?Platform=java", + "canonical_purl": "pkg:gem/jruby-launcher@1.1.2?platform=java", + "type": "gem", + "namespace": null, + "name": "jruby-launcher", + "version": "1.1.2", + "qualifiers": {"platform": "java"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "maven often uses qualifiers", + "purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repositorY_url=repo.spring.io/release", + "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io/release", + "type": "maven", + "namespace": "org.apache.xmlgraphics", + "name": "batik-anim", + "version": "1.9.1", + "qualifiers": {"classifier": "sources", "repository_url": "repo.spring.io/release"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "maven pom reference", + "purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repositorY_url=repo.spring.io/release", + "canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io/release", + "type": "maven", + "namespace": "org.apache.xmlgraphics", + "name": "batik-anim", + "version": "1.9.1", + "qualifiers": {"extension": "pom", "repository_url": "repo.spring.io/release"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "maven can come with a type qualifier", + "purl": "pkg:Maven/net.sf.jacob-project/jacob@1.14.3?classifier=x86&type=dll", + "canonical_purl": "pkg:maven/net.sf.jacob-project/jacob@1.14.3?classifier=x86&type=dll", + "type": "maven", + "namespace": "net.sf.jacob-project", + "name": "jacob", + "version": "1.14.3", + "qualifiers": {"classifier": "x86", "type": "dll"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "npm can be scoped", + "purl": "pkg:npm/%40angular/animation@12.3.1", + "canonical_purl": "pkg:npm/%40angular/animation@12.3.1", + "type": "npm", + "namespace": "@angular", + "name": "animation", + "version": "12.3.1", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "nuget names are case sensitive", + "purl": "pkg:Nuget/EnterpriseLibrary.Common@6.0.1304", + "canonical_purl": "pkg:nuget/EnterpriseLibrary.Common@6.0.1304", + "type": "nuget", + "namespace": null, + "name": "EnterpriseLibrary.Common", + "version": "6.0.1304", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "pypi names have special rules and not case sensitive", + "purl": "pkg:PYPI/Django_package@1.11.1.dev1", + "canonical_purl": "pkg:pypi/django-package@1.11.1.dev1", + "type": "pypi", + "namespace": null, + "name": "django-package", + "version": "1.11.1.dev1", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "rpm often use qualifiers", + "purl": "pkg:Rpm/fedora/curl@7.50.3-1.fc25?Arch=i386&Distro=fedora-25", + "canonical_purl": "pkg:rpm/fedora/curl@7.50.3-1.fc25?arch=i386&distro=fedora-25", + "type": "rpm", + "namespace": "fedora", + "name": "curl", + "version": "7.50.3-1.fc25", + "qualifiers": {"arch": "i386", "distro": "fedora-25"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "a scheme is always required", + "purl": "EnterpriseLibrary.Common@6.0.1304", + "canonical_purl": "EnterpriseLibrary.Common@6.0.1304", + "type": null, + "namespace": null, + "name": "EnterpriseLibrary.Common", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "a type is always required", + "purl": "pkg:EnterpriseLibrary.Common@6.0.1304", + "canonical_purl": "pkg:EnterpriseLibrary.Common@6.0.1304", + "type": null, + "namespace": null, + "name": "EnterpriseLibrary.Common", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "a name is required", + "purl": "pkg:maven/@1.3.4", + "canonical_purl": "pkg:maven/@1.3.4", + "type": "maven", + "namespace": null, + "name": null, + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "slash / after scheme is not significant", + "purl": "pkg:/maven/org.apache.commons/io", + "canonical_purl": "pkg:maven/org.apache.commons/io", + "type": "maven", + "namespace": "org.apache.commons", + "name": "io", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "double slash // after scheme is not significant", + "purl": "pkg://maven/org.apache.commons/io", + "canonical_purl": "pkg:maven/org.apache.commons/io", + "type": "maven", + "namespace": "org.apache.commons", + "name": "io", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "slash /// after type is not significant", + "purl": "pkg:///maven/org.apache.commons/io", + "canonical_purl": "pkg:maven/org.apache.commons/io", + "type": "maven", + "namespace": "org.apache.commons", + "name": "io", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "valid maven purl with case sensitive namespace and name", + "purl": "pkg:maven/HTTPClient/HTTPClient@0.3-3", + "canonical_purl": "pkg:maven/HTTPClient/HTTPClient@0.3-3", + "type": "maven", + "namespace": "HTTPClient", + "name": "HTTPClient", + "version": "0.3-3", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "valid maven purl containing a space in the version and qualifier", + "purl": "pkg:maven/mygroup/myartifact@1.0.0%20Final?mykey=my%20value", + "canonical_purl": "pkg:maven/mygroup/myartifact@1.0.0%20Final?mykey=my%20value", + "type": "maven", + "namespace": "mygroup", + "name": "myartifact", + "version": "1.0.0 Final", + "qualifiers": {"mykey": "my value"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "checks for invalid qualifier keys", + "purl": "pkg:npm/myartifact@1.0.0?in%20production=true", + "canonical_purl": null, + "type": "npm", + "namespace": null, + "name": "myartifact", + "version": "1.0.0", + "qualifiers": {"in production": "true"}, + "subpath": null, + "is_invalid": true + }, + { + "description": "valid conan purl", + "purl": "pkg:conan/cctz@2.3", + "canonical_purl": "pkg:conan/cctz@2.3", + "type": "conan", + "namespace": null, + "name": "cctz", + "version": "2.3", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "valid conan purl with namespace and qualifier channel", + "purl": "pkg:conan/bincrafters/cctz@2.3?channel=stable", + "canonical_purl": "pkg:conan/bincrafters/cctz@2.3?channel=stable", + "type": "conan", + "namespace": "bincrafters", + "name": "cctz", + "version": "2.3", + "qualifiers": {"channel": "stable"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "invalid conan purl only namespace", + "purl": "pkg:conan/bincrafters/cctz@2.3", + "canonical_purl": "pkg:conan/bincrafters/cctz@2.3", + "type": "conan", + "namespace": "bincrafters", + "name": "cctz", + "version": "2.3", + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "invalid conan purl only channel qualifier", + "purl": "pkg:conan/cctz@2.3?channel=stable", + "canonical_purl": "pkg:conan/cctz@2.3?channel=stable", + "type": "conan", + "namespace": null, + "name": "cctz", + "version": "2.3", + "qualifiers": {"channel": "stable"}, + "subpath": null, + "is_invalid": true + }, + { + "description": "valid conda purl with qualifiers", + "purl": "pkg:conda/absl-py@0.4.1?build=py36h06a4308_0&channel=main&subdir=linux-64&type=tar.bz2", + "canonical_purl": "pkg:conda/absl-py@0.4.1?build=py36h06a4308_0&channel=main&subdir=linux-64&type=tar.bz2", + "type": "conda", + "namespace": null, + "name": "absl-py", + "version": "0.4.1", + "qualifiers": {"build": "py36h06a4308_0", "channel": "main", "subdir": "linux-64", "type": "tar.bz2"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "valid cran purl", + "purl": "pkg:cran/A3@0.9.1", + "canonical_purl": "pkg:cran/A3@0.9.1", + "type": "cran", + "namespace": null, + "name": "A3", + "version": "0.9.1", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "invalid cran purl without name", + "purl": "pkg:cran/@0.9.1", + "canonical_purl": "pkg:cran/@0.9.1", + "type": "cran", + "namespace": null, + "name": null, + "version": "0.9.1", + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "invalid cran purl without version", + "purl": "pkg:cran/A3", + "canonical_purl": "pkg:cran/A3", + "type": "cran", + "namespace": null, + "name": "A3", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "valid swift purl", + "purl": "pkg:swift/github.com/Alamofire/Alamofire@5.4.3", + "canonical_purl": "pkg:swift/github.com/Alamofire/Alamofire@5.4.3", + "type": "swift", + "namespace": "github.com/Alamofire", + "name": "Alamofire", + "version": "5.4.3", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "invalid swift purl without namespace", + "purl": "pkg:swift/Alamofire@5.4.3", + "canonical_purl": "pkg:swift/Alamofire@5.4.3", + "type": "swift", + "namespace": null, + "name": "Alamofire", + "version": "5.4.3", + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "invalid swift purl without name", + "purl": "pkg:swift/github.com/Alamofire/@5.4.3", + "canonical_purl": "pkg:swift/github.com/Alamofire/@5.4.3", + "type": "swift", + "namespace": "github.com/Alamofire", + "name": null, + "version": "5.4.3", + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "invalid swift purl without version", + "purl": "pkg:swift/github.com/Alamofire/Alamofire", + "canonical_purl": "pkg:swift/github.com/Alamofire/Alamofire", + "type": "swift", + "namespace": "github.com/Alamofire", + "name": "Alamofire", + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "valid hackage purl", + "purl": "pkg:hackage/AC-HalfInteger@1.2.1", + "canonical_purl": "pkg:hackage/AC-HalfInteger@1.2.1", + "type": "hackage", + "namespace": null, + "name": "AC-HalfInteger", + "version": "1.2.1", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "name and version are always required", + "purl": "pkg:hackage", + "canonical_purl": "pkg:hackage", + "type": "hackage", + "namespace": null, + "name": null, + "version": null, + "qualifiers": null, + "subpath": null, + "is_invalid": true + }, + { + "description": "minimal Hugging Face model", + "purl": "pkg:huggingface/distilbert-base-uncased@043235d6088ecd3dd5fb5ca3592b6913fd516027", + "canonical_purl": "pkg:huggingface/distilbert-base-uncased@043235d6088ecd3dd5fb5ca3592b6913fd516027", + "type": "huggingface", + "namespace": null, + "name": "distilbert-base-uncased", + "version": "043235d6088ecd3dd5fb5ca3592b6913fd516027", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "Hugging Face model with staging endpoint", + "purl": "pkg:huggingface/microsoft/deberta-v3-base@559062ad13d311b87b2c455e67dcd5f1c8f65111?repository_url=https://hub-ci.huggingface.co", + "canonical_purl": "pkg:huggingface/microsoft/deberta-v3-base@559062ad13d311b87b2c455e67dcd5f1c8f65111?repository_url=https://hub-ci.huggingface.co", + "type": "huggingface", + "namespace": "microsoft", + "name": "deberta-v3-base", + "version": "559062ad13d311b87b2c455e67dcd5f1c8f65111", + "qualifiers": {"repository_url": "https://hub-ci.huggingface.co"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "Hugging Face model with various cases", + "purl": "pkg:huggingface/EleutherAI/gpt-neo-1.3B@797174552AE47F449AB70B684CABCB6603E5E85E", + "canonical_purl": "pkg:huggingface/EleutherAI/gpt-neo-1.3B@797174552ae47f449ab70b684cabcb6603e5e85e", + "type": "huggingface", + "namespace": "EleutherAI", + "name": "gpt-neo-1.3B", + "version": "797174552ae47f449ab70b684cabcb6603e5e85e", + "qualifiers": null, + "subpath": null, + "is_invalid": false + }, + { + "description": "MLflow model tracked in Azure Databricks (case insensitive)", + "purl": "pkg:mlflow/CreditFraud@3?repository_url=https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow", + "canonical_purl": "pkg:mlflow/creditfraud@3?repository_url=https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow", + "type": "mlflow", + "namespace": null, + "name": "creditfraud", + "version": "3", + "qualifiers": {"repository_url": "https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "MLflow model tracked in Azure ML (case sensitive)", + "purl": "pkg:mlflow/CreditFraud@3?repository_url=https://westus2.api.azureml.ms/mlflow/v1.0/subscriptions/a50f2011-fab8-4164-af23-c62881ef8c95/resourceGroups/TestResourceGroup/providers/Microsoft.MachineLearningServices/workspaces/TestWorkspace", + "canonical_purl": "pkg:mlflow/CreditFraud@3?repository_url=https://westus2.api.azureml.ms/mlflow/v1.0/subscriptions/a50f2011-fab8-4164-af23-c62881ef8c95/resourceGroups/TestResourceGroup/providers/Microsoft.MachineLearningServices/workspaces/TestWorkspace", + "type": "mlflow", + "namespace": null, + "name": "CreditFraud", + "version": "3", + "qualifiers": {"repository_url": "https://westus2.api.azureml.ms/mlflow/v1.0/subscriptions/a50f2011-fab8-4164-af23-c62881ef8c95/resourceGroups/TestResourceGroup/providers/Microsoft.MachineLearningServices/workspaces/TestWorkspace"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "MLflow model with unique identifiers", + "purl": "pkg:mlflow/trafficsigns@10?model_uuid=36233173b22f4c89b451f1228d700d49&run_id=410a3121-2709-4f88-98dd-dba0ef056b0a&repository_url=https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow", + "canonical_purl": "pkg:mlflow/trafficsigns@10?model_uuid=36233173b22f4c89b451f1228d700d49&repository_url=https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow&run_id=410a3121-2709-4f88-98dd-dba0ef056b0a", + "type": "mlflow", + "namespace": null, + "name": "trafficsigns", + "version": "10", + "qualifiers": {"model_uuid": "36233173b22f4c89b451f1228d700d49", "run_id": "410a3121-2709-4f88-98dd-dba0ef056b0a", "repository_url": "https://adb-5245952564735461.0.azuredatabricks.net/api/2.0/mlflow"}, + "subpath": null, + "is_invalid": false + }, + { + "description": "composer names are not case sensitive", + "purl": "pkg:composer/Laravel/Laravel@5.5.0", + "canonical_purl": "pkg:composer/laravel/laravel@5.5.0", + "type": "composer", + "namespace": "laravel", + "name": "laravel", + "version": "5.5.0", + "qualifiers": null, + "subpath": null, + "is_invalid": false + } +] diff --git a/xtask/src/lib.rs b/xtask/src/lib.rs new file mode 100644 index 0000000..be56ed3 --- /dev/null +++ b/xtask/src/lib.rs @@ -0,0 +1 @@ +#![cfg(not(tarpaulin_include))] diff --git a/xtask/src/main.rs b/xtask/src/main.rs new file mode 100644 index 0000000..7dfa216 --- /dev/null +++ b/xtask/src/main.rs @@ -0,0 +1,69 @@ +use std::env; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; +use std::process::{self, Command}; + +use clap::{Parser, Subcommand}; + +mod generate_tests; + +#[derive(Parser)] +struct Args { + /// The command to run. + #[command(subcommand)] + task: Task, +} + +#[derive(Subcommand)] +enum Task { + /// Generate integration test suite. + Codegen, + /// Generate docs. + #[command(disable_help_flag = true)] + Doc { + #[arg(allow_hyphen_values = true, trailing_var_arg = true)] + rest: Vec, + }, + /// Format code. + #[command(disable_help_flag = true)] + Fmt { + #[arg(allow_hyphen_values = true, trailing_var_arg = true)] + rest: Vec, + }, +} + +fn run_command(command: &mut Command) { + if !command.status().unwrap().success() { + process::exit(1) + } +} + +fn main() { + let args = Args::parse(); + + match args.task { + Task::Codegen => { + generate_tests::main(); + // Reformat the test code now. + run_command(Command::new("cargo").args(["+nightly", "fmt", "-p", "purl_test"])); + }, + Task::Doc { rest } => { + run_command( + Command::new("cargo") + .env( + "RUSTDOCFLAGS", + format!("--cfg docsrs {}", env::var("RUSTDOCFLAGS").unwrap_or_default()), + ) + .args(["+nightly", "doc", "--all-features"]) + .args(rest), + ); + }, + Task::Fmt { rest } => { + run_command(Command::new("cargo").args(["+nightly", "fmt"]).args(rest)); + }, + } +} + +fn workspace_dir() -> PathBuf { + Path::new(&env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned() +}