diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 1cfc22ead..bba66c6d3 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,11 +7,6 @@ updates: schedule: interval: monthly - - package-ecosystem: docker - directory: / - schedule: - interval: monthly - - package-ecosystem: cargo directory: /benchmarks/competitors/servo-url schedule: @@ -20,4 +15,4 @@ updates: - package-ecosystem: pip directory: /tools/release schedule: - interval: monthly \ No newline at end of file + interval: monthly diff --git a/.github/workflows/aarch64.yml b/.github/workflows/aarch64.yml index 023e2888f..85a67ceb5 100644 --- a/.github/workflows/aarch64.yml +++ b/.github/workflows/aarch64.yml @@ -24,8 +24,8 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - uses: uraimo/run-on-arch-action@b0ffb25eb00af00468375982384441f063da1741 # v2.7.2 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: uraimo/run-on-arch-action@5397f9e30a9b62422f302092631c99ae1effcd9e # v2.8.1 name: Build and Test id: runcmd with: @@ -38,6 +38,6 @@ jobs: ln -s -f /usr/bin/gcc-12 /usr/bin/gcc ln -s -f /usr/bin/g++-12 /usr/bin/g++ run: | - cmake -DCMAKE_CXX_STANDARD=20 -B build + cmake -DCMAKE_CXX_STANDARD=20 -D ADA_TESTING=ON -B build cmake --build build ctest --test-dir build diff --git a/.github/workflows/alpine.yml b/.github/workflows/alpine.yml index 058b74e3e..e54a7bf2e 100644 --- a/.github/workflows/alpine.yml +++ b/.github/workflows/alpine.yml @@ -24,7 +24,7 @@ jobs: ubuntu-build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: start docker run: | docker run -w /src -dit --name alpine -v $PWD:/src alpine:latest @@ -36,7 +36,7 @@ jobs: ./alpine.sh apk add build-base cmake g++ linux-headers git bash icu-dev - name: cmake run: | - ./alpine.sh cmake -DADA_BENCHMARKS=ON -B build_for_alpine + ./alpine.sh cmake -D ADA_TESTING=ON -DADA_BENCHMARKS=ON -B build_for_alpine - name: build run: | ./alpine.sh cmake --build build_for_alpine diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml index 7f1f1f02d..f729d9a30 100644 --- a/.github/workflows/cifuzz.yml +++ b/.github/workflows/cifuzz.yml @@ -34,7 +34,7 @@ jobs: fuzz-seconds: 600 sanitizer: ${{ matrix.sanitizer }} - name: Upload Crash - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: steps.build.outcome == 'success' with: name: ${{ matrix.sanitizer }}-artifacts diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a993f2940..972df4ecc 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -28,20 +28,20 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v2.2.5 + uses: github/codeql-action/init@4f3212b61783c3c68e8309a0f18a699764811cda # v2.2.5 with: languages: ${{ matrix.language }} # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v2.2.5 + uses: github/codeql-action/autobuild@4f3212b61783c3c68e8309a0f18a699764811cda # v2.2.5 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v2.2.5 + uses: github/codeql-action/analyze@4f3212b61783c3c68e8309a0f18a699764811cda # v2.2.5 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 2c56af54d..cb4ef1227 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -10,6 +10,6 @@ jobs: runs-on: ubuntu-latest steps: - name: 'Checkout Repository' - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: 'Dependency Review' - uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4 + uses: actions/dependency-review-action@4081bf99e2866ebe428fc0477b69eb4fcda7220a # v4.4.0 diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 87458661e..d0028c842 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -22,7 +22,7 @@ jobs: id-token: write runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install theme run: ./tools/prepare-doxygen.sh - uses: mattnotmitt/doxygen-action@e0c8cd4cd05e28b88e723b25b30188ecf2505b40 # edge diff --git a/.github/workflows/emscripten.yml b/.github/workflows/emscripten.yml index 92471dfee..601f37bbc 100644 --- a/.github/workflows/emscripten.yml +++ b/.github/workflows/emscripten.yml @@ -24,15 +24,15 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 - uses: mymindstorm/setup-emsdk@6ab9eb1bda2574c4ddb79809fc9247783eaf9021 # v14 - name: Verify run: emcc -v - name: Checkout - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v3.6.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v3.6.0 - name: Configure - run: emcmake cmake -B buildwasm -D ADA_TOOLS=OFF + run: emcmake cmake -B buildwasm -D ADA_TESTING=ON -D ADA_TOOLS=OFF - name: Build run: cmake --build buildwasm - name: Test diff --git a/.github/workflows/lint_and_format_check.yml b/.github/workflows/lint_and_format_check.yml index 7077ec1f6..9d4f3113d 100644 --- a/.github/workflows/lint_and_format_check.yml +++ b/.github/workflows/lint_and_format_check.yml @@ -24,7 +24,7 @@ jobs: lint-and-format: runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Run clang-format uses: jidicula/clang-format-action@c74383674bf5f7c69f60ce562019c1c94bc1421a # v4.13.0 diff --git a/.github/workflows/macos_install.yml b/.github/workflows/macos_install.yml index 76199e791..3f9570eec 100644 --- a/.github/workflows/macos_install.yml +++ b/.github/workflows/macos_install.yml @@ -28,9 +28,9 @@ jobs: include: shared: [ON, OFF] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Prepare - run: cmake -DBUILD_SHARED_LIBS=${{matrix.shared}} -DCMAKE_INSTALL_PREFIX:PATH=destination -B build + run: cmake -D ADA_TESTING=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -DCMAKE_INSTALL_PREFIX:PATH=destination -B build - name: Build run: cmake --build build -j=3 - name: Install diff --git a/.github/workflows/release-script-tests.yml b/.github/workflows/release-script-tests.yml index 11279fa0c..dadeb0955 100644 --- a/.github/workflows/release-script-tests.yml +++ b/.github/workflows/release-script-tests.yml @@ -26,10 +26,10 @@ jobs: working-directory: ./tools/release steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Prepare Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: cache: 'pip' # caching pip dependencies diff --git a/.github/workflows/release_create.yml b/.github/workflows/release_create.yml index f5b873209..dc48e9229 100644 --- a/.github/workflows/release_create.yml +++ b/.github/workflows/release_create.yml @@ -36,10 +36,10 @@ jobs: NEXT_RELEASE_TAG: ${{ github.event.pull_request.head.ref }} steps: - name: Checkout - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Prepare Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: cache: 'pip' # caching pip dependencies diff --git a/.github/workflows/release_prepare.yml b/.github/workflows/release_prepare.yml index 3e9fc6a9f..b714a2203 100644 --- a/.github/workflows/release_prepare.yml +++ b/.github/workflows/release_prepare.yml @@ -26,10 +26,10 @@ jobs: env: CXX: clang++-14 steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Prepare Python - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: cache: 'pip' # caching pip dependencies diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 39433ad6e..214e4966b 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -30,7 +30,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false @@ -57,7 +57,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: SARIF file path: results.sarif @@ -65,6 +65,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + uses: github/codeql-action/upload-sarif@4f3212b61783c3c68e8309a0f18a699764811cda # v3.27.1 with: sarif_file: results.sarif diff --git a/.github/workflows/ubuntu-release.yml b/.github/workflows/ubuntu-release.yml index 7e835d47e..185d7076f 100644 --- a/.github/workflows/ubuntu-release.yml +++ b/.github/workflows/ubuntu-release.yml @@ -27,7 +27,7 @@ jobs: matrix: cxx: [g++-12, clang++-14] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Ninja run: sudo apt-get install ninja-build - name: Prepare diff --git a/.github/workflows/ubuntu-s390x.yml b/.github/workflows/ubuntu-s390x.yml index 2c1a32506..6a38470cf 100644 --- a/.github/workflows/ubuntu-s390x.yml +++ b/.github/workflows/ubuntu-s390x.yml @@ -24,8 +24,8 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - uses: uraimo/run-on-arch-action@b0ffb25eb00af00468375982384441f063da1741 # v2.7.2 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: uraimo/run-on-arch-action@5397f9e30a9b62422f302092631c99ae1effcd9e # v2.8.1 name: Test id: runcmd with: @@ -36,7 +36,7 @@ jobs: apt-get update -q -y apt-get install -y cmake make g++ git ninja-build run: | - cmake -DCMAKE_BUILD_TYPE=Release -G Ninja -B build + cmake -D ADA_TESTING=ON -DCMAKE_BUILD_TYPE=Release -G Ninja -B build rm -r -f dependencies cmake --build build -j=4 ctest --output-on-failure --test-dir build diff --git a/.github/workflows/ubuntu-sanitized.yml b/.github/workflows/ubuntu-sanitized.yml index 724a64593..8716f05de 100644 --- a/.github/workflows/ubuntu-sanitized.yml +++ b/.github/workflows/ubuntu-sanitized.yml @@ -27,11 +27,11 @@ jobs: matrix: shared: [ON, OFF] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Ninja run: sudo apt-get install ninja-build - name: Prepare - run: cmake -DADA_SANITIZE=ON -DADA_DEVELOPMENT_CHECKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + run: cmake -D ADA_TESTING=ON -DADA_SANITIZE=ON -DADA_DEVELOPMENT_CHECKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build env: CXX: g++-12 - name: Build diff --git a/.github/workflows/ubuntu-undef.yml b/.github/workflows/ubuntu-undef.yml index 66c472116..e8bc1a126 100644 --- a/.github/workflows/ubuntu-undef.yml +++ b/.github/workflows/ubuntu-undef.yml @@ -27,11 +27,11 @@ jobs: matrix: shared: [ON, OFF] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Ninja run: sudo apt-get install ninja-build - name: Prepare - run: cmake -D ADA_SANITIZE_UNDEFINED=ON -DADA_DEVELOPMENT_CHECKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + run: cmake -D ADA_TESTING=ON -D ADA_SANITIZE_UNDEFINED=ON -DADA_DEVELOPMENT_CHECKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build env: CXX: g++-12 - name: Build diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 1bfa8adc0..c20fdc5e1 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -28,11 +28,11 @@ jobs: shared: [ON, OFF] cxx: [g++-12, clang++-14] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Ninja run: sudo apt-get install ninja-build - name: Prepare - run: cmake -D ADA_BENCHMARKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + run: cmake -D ADA_TESTING=ON -D ADA_BENCHMARKS=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build env: CXX: ${{matrix.cxx}} - name: Build diff --git a/.github/workflows/ubuntu_install.yml b/.github/workflows/ubuntu_install.yml index 8579ac7e0..43fb0619c 100644 --- a/.github/workflows/ubuntu_install.yml +++ b/.github/workflows/ubuntu_install.yml @@ -24,11 +24,11 @@ jobs: ubuntu-build: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Ninja run: sudo apt-get install ninja-build - name: Prepare - run: cmake -G Ninja -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX:PATH=destination -B build + run: cmake -D ADA_TESTING=ON -G Ninja -DBUILD_SHARED_LIBS=${{matrix.shared}} -DCMAKE_INSTALL_PREFIX:PATH=destination -B build - name: Build run: cmake --build build -j=4 - name: Install diff --git a/.github/workflows/ubuntu_pedantic.yml b/.github/workflows/ubuntu_pedantic.yml index 48c68b459..ad77a7b04 100644 --- a/.github/workflows/ubuntu_pedantic.yml +++ b/.github/workflows/ubuntu_pedantic.yml @@ -27,11 +27,11 @@ jobs: matrix: shared: [ON, OFF] steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Setup Ninja run: sudo apt-get install ninja-build - name: Prepare - run: cmake -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build + run: cmake -D ADA_TESTING=ON -DBUILD_SHARED_LIBS=${{matrix.shared}} -G Ninja -B build env: CXX: g++-12 CXXFLAGS: -Werror diff --git a/.github/workflows/visual_studio.yml b/.github/workflows/visual_studio.yml index 76230d21c..7ce4ce3c2 100644 --- a/.github/workflows/visual_studio.yml +++ b/.github/workflows/visual_studio.yml @@ -34,10 +34,10 @@ jobs: - {gen: Visual Studio 17 2022, arch: Win32, devchecks: ON, shared: OFF, config: Debug} - {gen: Visual Studio 17 2022, arch: Win32, devchecks: ON, shared: ON, config: Debug} steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Configure run: | - cmake -DADA_DEVELOPMENT_CHECKS="${{matrix.devchecks}}" -G "${{matrix.gen}}" -A ${{matrix.arch}} -DBUILD_SHARED_LIBS=${{matrix.shared}} -B build + cmake -D ADA_TESTING=ON -DADA_DEVELOPMENT_CHECKS="${{matrix.devchecks}}" -G "${{matrix.gen}}" -A ${{matrix.arch}} -DBUILD_SHARED_LIBS=${{matrix.shared}} -B build - name: Build run: cmake --build build --config "${{matrix.config}}" --verbose - name: Run tests diff --git a/.github/workflows/visual_studio_clang.yml b/.github/workflows/visual_studio_clang.yml index 349bc8c33..5c2b71aac 100644 --- a/.github/workflows/visual_studio_clang.yml +++ b/.github/workflows/visual_studio_clang.yml @@ -30,10 +30,10 @@ jobs: include: - {gen: Visual Studio 17 2022, arch: x64, devchecks: ON} steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Configure run: | - cmake -DADA_DEVELOPMENT_CHECKS="${{matrix.devchecks}}" -G "${{matrix.gen}}" -A ${{matrix.arch}} -T ClangCL -B build + cmake -D ADA_TESTING=ON -DADA_DEVELOPMENT_CHECKS="${{matrix.devchecks}}" -G "${{matrix.gen}}" -A ${{matrix.arch}} -T ClangCL -B build - name: Build Debug run: cmake --build build --config Debug --verbose - name: Run Debug tests diff --git a/.github/workflows/wpt-updater.yml b/.github/workflows/wpt-updater.yml index 103fd1719..2694d25ff 100644 --- a/.github/workflows/wpt-updater.yml +++ b/.github/workflows/wpt-updater.yml @@ -21,7 +21,7 @@ jobs: contents: write pull-requests: write steps: - - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Fetch tests run: tools/update-wpt.sh - name: Open pull request diff --git a/CMakeLists.txt b/CMakeLists.txt index a4cbb576e..7b2c0850b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ add_subdirectory(src) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/scripts/cmake) option(ADA_BENCHMARKS "Build benchmarks" OFF) -option(ADA_TESTING "Build tests" ${BUILD_TESTING}) +option(ADA_TESTING "Build tests" OFF) # There are cases where when embedding ada as a dependency for other CMake # projects as submodules or subdirectories (via FetchContent) can lead to diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 9882798ed..000000000 --- a/Dockerfile +++ /dev/null @@ -1,12 +0,0 @@ -FROM debian:12-slim@sha256:2ccc7e39b0a6f504d252f807da1fc4b5bcd838e83e4dec3e2f57b2a4a64e7214 - -RUN apt-get update && apt-get install -y \ - apt-transport-https \ - gcc \ - clang \ - clang-tools \ - cmake - -WORKDIR /repo - -CMD ["bash", "-c", "cmake -B build && cmake --build build && cd build && ctest --output-on-failure"] diff --git a/README.md b/README.md index 5404b0d57..839b8e854 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Linux or macOS users might follow the following instructions if they have a rece #include int main(int, char *[]) { - auto url = ada::parse("https://www.google.com"); + auto url = ada::parse("https://www.google.com"); if (!url) { std::cout << "failure" << std::endl; return EXIT_FAILURE; diff --git a/benchmarks/competitors/servo-url/Cargo.lock b/benchmarks/competitors/servo-url/Cargo.lock index 141f37877..d8e818657 100644 --- a/benchmarks/competitors/servo-url/Cargo.lock +++ b/benchmarks/competitors/servo-url/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -11,14 +22,143 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -27,12 +167,56 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "percent-encoding" version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "servo-url" version = "0.1.0" @@ -42,42 +226,153 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.6.0" +name = "smallvec" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" -dependencies = [ - "tinyvec_macros", -] +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] -name = "tinyvec_macros" -version = "0.1.1" +name = "stable_deref_trait" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] -name = "unicode-bidi" -version = "0.3.10" +name = "syn" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] [[package]] -name = "unicode-normalization" -version = "0.1.22" +name = "synstructure" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ - "tinyvec", + "proc-macro2", + "quote", + "syn", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + [[package]] name = "url" -version = "2.5.2" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" dependencies = [ "form_urlencoded", "idna", "percent-encoding", ] + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/benchmarks/competitors/servo-url/Cargo.toml b/benchmarks/competitors/servo-url/Cargo.toml index 17aacb42c..7f2069c0e 100644 --- a/benchmarks/competitors/servo-url/Cargo.toml +++ b/benchmarks/competitors/servo-url/Cargo.toml @@ -7,7 +7,7 @@ path = "lib.rs" crate-type = ["cdylib"] [dependencies] -url = "2.5.2" +url = "2.5.3" libc = "0.2" [profile.release] diff --git a/clang-format-ignore.txt b/clang-format-ignore.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/cmake/ada-flags.cmake b/cmake/ada-flags.cmake index f4bb9dd04..c351a7a94 100644 --- a/cmake/ada-flags.cmake +++ b/cmake/ada-flags.cmake @@ -15,7 +15,7 @@ if(ADA_SANITIZE_UNDEFINED) message(STATUS "Undefined sanitizer enabled.") endif() option(ADA_COVERAGE "Compute coverage" OFF) -option(ADA_TOOLS "Build cli tools (adaparse)" ON) +option(ADA_TOOLS "Build cli tools (adaparse)" OFF) if (ADA_COVERAGE) message(STATUS "You want to compute coverage. We assume that you have installed gcovr.") diff --git a/include/ada/checkers.h b/include/ada/checkers.h index 20bd3df52..6b50915ef 100644 --- a/include/ada/checkers.h +++ b/include/ada/checkers.h @@ -85,7 +85,7 @@ inline constexpr bool is_normalized_windows_drive_letter( * does not contain uppercase ASCII characters (the input should have been * lowered cased before calling this function) and is not empty. */ -ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept; +ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept; /** * @private diff --git a/include/ada/common_defs.h b/include/ada/common_defs.h index 4e8b3b95e..35f86d3b1 100644 --- a/include/ada/common_defs.h +++ b/include/ada/common_defs.h @@ -173,18 +173,6 @@ namespace ada { } } // namespace ada -#if defined(__GNUC__) && !defined(__clang__) -#if __GNUC__ <= 8 -#define ADA_OLD_GCC 1 -#endif // __GNUC__ <= 8 -#endif // defined(__GNUC__) && !defined(__clang__) - -#if ADA_OLD_GCC -#define ada_constexpr -#else -#define ada_constexpr constexpr -#endif - // Unless the programmer has already set ADA_DEVELOPMENT_CHECKS, // we want to set it under debug builds. We detect a debug build // under Visual Studio when the _DEBUG macro is set. Under the other diff --git a/include/ada/helpers.h b/include/ada/helpers.h index b9a553c02..d20473f0c 100644 --- a/include/ada/helpers.h +++ b/include/ada/helpers.h @@ -12,6 +12,10 @@ #include #include +#if ADA_DEVELOPMENT_CHECKS +#include +#endif // ADA_DEVELOPMENT_CHECKS + /** * These functions are not part of our public API and may * change at any time. @@ -128,7 +132,7 @@ ada_really_inline void resize(std::string_view& input, size_t pos) noexcept; * and whether a colon was found outside brackets. Used by the host parser. */ ada_really_inline std::pair get_host_delimiter_location( - const bool is_special, std::string_view& view) noexcept; + bool is_special, std::string_view& view) noexcept; /** * @private diff --git a/include/ada/log.h b/include/ada/log.h index 5b00d360c..c05526ae8 100644 --- a/include/ada/log.h +++ b/include/ada/log.h @@ -7,65 +7,31 @@ #define ADA_LOG_H #include "ada/common_defs.h" -#include // To enable logging, set ADA_LOGGING to 1: #ifndef ADA_LOGGING #define ADA_LOGGING 0 #endif -namespace ada { - -/** - * Private function used for logging messages. - * @private - */ -template -ada_really_inline void inner_log([[maybe_unused]] T t) { -#if ADA_LOGGING - std::cout << t << std::endl; -#endif -} - -/** - * Private function used for logging messages. - * @private - */ -template -ada_really_inline void inner_log([[maybe_unused]] T t, - [[maybe_unused]] Args... args) { #if ADA_LOGGING - std::cout << t; - inner_log(args...); -#endif -} +#include +#endif // ADA_LOGGING -/** - * Log a message. - * @private - */ -template -ada_really_inline void log([[maybe_unused]] T t, - [[maybe_unused]] Args... args) { -#if ADA_LOGGING - std::cout << "ADA_LOG: " << t; - inner_log(args...); -#endif -} +namespace ada { /** - * Log a message. + * Log a message. If you want to have no overhead when logging is disabled, use + * the ada_log macro. * @private */ -template -ada_really_inline void log([[maybe_unused]] T t) { +template +constexpr ada_really_inline void log([[maybe_unused]] Args... args) { #if ADA_LOGGING - std::cout << "ADA_LOG: " << t << std::endl; -#endif + ((std::cout << "ADA_LOG: ") << ... << args) << std::endl; +#endif // ADA_LOGGING } } // namespace ada #if ADA_LOGGING - #ifndef ada_log #define ada_log(...) \ do { \ diff --git a/include/ada/scheme.h b/include/ada/scheme.h index f14a85e6d..b83dccb48 100644 --- a/include/ada/scheme.h +++ b/include/ada/scheme.h @@ -8,7 +8,6 @@ #include "ada/common_defs.h" #include -#include #include /** diff --git a/include/ada/serializers.h b/include/ada/serializers.h index d8e0d3d6f..260dcf3a1 100644 --- a/include/ada/serializers.h +++ b/include/ada/serializers.h @@ -8,7 +8,6 @@ #include "ada/common_defs.h" #include -#include #include /** diff --git a/include/ada/unicode-inl.h b/include/ada/unicode-inl.h index 7bbbd8fe3..772af6cf9 100644 --- a/include/ada/unicode-inl.h +++ b/include/ada/unicode-inl.h @@ -19,8 +19,7 @@ namespace ada::unicode { ada_really_inline size_t percent_encode_index(const std::string_view input, const uint8_t character_set[]) { return std::distance( - input.begin(), - std::find_if(input.begin(), input.end(), [character_set](const char c) { + input.begin(), std::ranges::find_if(input, [character_set](const char c) { return character_sets::bit_at(character_set, c); })); } diff --git a/include/ada/unicode.h b/include/ada/unicode.h index 198109819..53b484139 100644 --- a/include/ada/unicode.h +++ b/include/ada/unicode.h @@ -147,7 +147,7 @@ ada_really_inline constexpr bool is_ascii_tab_or_newline(char c) noexcept; * @details A double-dot path segment must be ".." or an ASCII case-insensitive * match for ".%2e", "%2e.", or "%2e%2e". */ -ada_really_inline ada_constexpr bool is_double_dot_path_segment( +ada_really_inline constexpr bool is_double_dot_path_segment( std::string_view input) noexcept; /** diff --git a/include/ada/url-inl.h b/include/ada/url-inl.h index 863e6c5e7..f2ff0160d 100644 --- a/include/ada/url-inl.h +++ b/include/ada/url-inl.h @@ -132,8 +132,8 @@ inline void url::update_base_search(std::string_view input, query = ada::unicode::percent_encode(input, query_percent_encode_set); } -inline void url::update_base_search(std::optional input) { - query = input; +inline void url::update_base_search(std::optional &&input) { + query = std::move(input); } inline void url::update_base_pathname(const std::string_view input) { @@ -232,7 +232,7 @@ ada_really_inline size_t url::parse_port(std::string_view view, return 0; } ada_log("parse_port: ", parsed_port); - const size_t consumed = size_t(r.ptr - view.data()); + const auto consumed = size_t(r.ptr - view.data()); ada_log("parse_port: consumed ", consumed); if (check_trailing_content) { is_valid &= @@ -245,9 +245,8 @@ ada_really_inline size_t url::parse_port(std::string_view view, auto default_port = scheme_default_port(); bool is_port_valid = (default_port == 0 && parsed_port == 0) || (default_port != parsed_port); - port = (r.ec == std::errc() && is_port_valid) - ? std::optional(parsed_port) - : std::nullopt; + port = (r.ec == std::errc() && is_port_valid) ? std::optional(parsed_port) + : std::nullopt; } return consumed; } diff --git a/include/ada/url.h b/include/ada/url.h index 63740e6e3..bbcac47e1 100644 --- a/include/ada/url.h +++ b/include/ada/url.h @@ -302,10 +302,9 @@ struct url : url_base { inline void update_unencoded_base_hash(std::string_view input); inline void update_base_hostname(std::string_view input); - inline void update_base_search(std::string_view input); inline void update_base_search(std::string_view input, const uint8_t query_percent_encode_set[]); - inline void update_base_search(std::optional input); + inline void update_base_search(std::optional &&input); inline void update_base_pathname(std::string_view input); inline void update_base_username(std::string_view input); inline void update_base_password(std::string_view input); diff --git a/include/ada/url_aggregator-inl.h b/include/ada/url_aggregator-inl.h index 483214a75..2bca0d196 100644 --- a/include/ada/url_aggregator-inl.h +++ b/include/ada/url_aggregator-inl.h @@ -1109,6 +1109,28 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url_aggregator &u) { return out << u.to_string(); } + +void url_aggregator::update_host_to_base_host( + const std::string_view input) noexcept { + ada_log("url_aggregator::update_host_to_base_host ", input); + ADA_ASSERT_TRUE(validate()); + ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); + if (type != ada::scheme::type::FILE) { + // Let host be the result of host parsing host_view with url is not special. + if (input.empty() && !is_special()) { + if (has_hostname()) { + clear_hostname(); + } else if (has_dash_dot()) { + add_authority_slashes_if_needed(); + delete_dash_dot(); + } + return; + } + } + update_base_hostname(input); + ADA_ASSERT_TRUE(validate()); + return; +} } // namespace ada #endif // ADA_URL_AGGREGATOR_INL_H diff --git a/include/ada/url_aggregator.h b/include/ada/url_aggregator.h index 8c71a7395..dad4750c8 100644 --- a/include/ada/url_aggregator.h +++ b/include/ada/url_aggregator.h @@ -313,6 +313,8 @@ struct url_aggregator : url_base { std::string_view new_scheme_with_colon) noexcept; inline void copy_scheme(const url_aggregator &u) noexcept; + inline void update_host_to_base_host(const std::string_view input) noexcept; + }; // url_aggregator inline std::ostream &operator<<(std::ostream &out, const ada::url &u); diff --git a/include/ada/url_search_params-inl.h b/include/ada/url_search_params-inl.h index 6edb37c52..0ea2017ae 100644 --- a/include/ada/url_search_params-inl.h +++ b/include/ada/url_search_params-inl.h @@ -193,7 +193,7 @@ inline url_search_params_entries_iter url_search_params::get_entries() { } template -inline bool url_search_params_iter::has_next() { +inline bool url_search_params_iter::has_next() const { return pos < params.params.size(); } diff --git a/include/ada/url_search_params.h b/include/ada/url_search_params.h index 00052c786..84018087a 100644 --- a/include/ada/url_search_params.h +++ b/include/ada/url_search_params.h @@ -42,7 +42,9 @@ struct url_search_params { * @see * https://github.com/web-platform-tests/wpt/blob/master/url/urlsearchparams-constructor.any.js */ - url_search_params(const std::string_view input) { initialize(input); } + explicit url_search_params(const std::string_view input) { + initialize(input); + } url_search_params(const url_search_params &u) = default; url_search_params(url_search_params &&u) noexcept = default; @@ -172,7 +174,7 @@ struct url_search_params_iter { */ inline std::optional next(); - inline bool has_next(); + inline bool has_next() const; private: static url_search_params EMPTY; diff --git a/src/ada.cpp b/src/ada.cpp index 164f37d74..26090909f 100644 --- a/src/ada.cpp +++ b/src/ada.cpp @@ -5,9 +5,7 @@ #include "implementation.cpp" #include "helpers.cpp" #include "url.cpp" -#include "url-getters.cpp" -#include "url-setters.cpp" #include "parser.cpp" #include "url_components.cpp" #include "url_aggregator.cpp" -#include "ada_c.cpp" \ No newline at end of file +#include "ada_c.cpp" diff --git a/src/checkers.cpp b/src/checkers.cpp index ffca6ddb3..82e1fe32f 100644 --- a/src/checkers.cpp +++ b/src/checkers.cpp @@ -4,22 +4,21 @@ namespace ada::checkers { -ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept { +ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept { // The string is not empty and does not contain upper case ASCII characters. // // Optimization. To be considered as a possible ipv4, the string must end // with 'x' or a lowercase hex character. // Most of the time, this will be false so this simple check will save a lot // of effort. - char last_char = view.back(); // If the address ends with a dot, we need to prune it (special case). - if (last_char == '.') { + if (view.ends_with('.')) { view.remove_suffix(1); if (view.empty()) { return false; } - last_char = view.back(); } + char last_char = view.back(); bool possible_ipv4 = (last_char >= '0' && last_char <= '9') || (last_char >= 'a' && last_char <= 'f') || last_char == 'x'; @@ -35,7 +34,7 @@ ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept { /** Optimization opportunity: we have basically identified the last number of the ipv4 if we return true here. We might as well parse it and have at least one number parsed when we get to parse_ipv4. */ - if (std::all_of(view.begin(), view.end(), ada::checkers::is_digit)) { + if (std::ranges::all_of(view, ada::checkers::is_digit)) { return true; } // It could be hex (0x), but not if there is a single character. diff --git a/src/helpers.cpp b/src/helpers.cpp index c5926cb65..b84b533ec 100644 --- a/src/helpers.cpp +++ b/src/helpers.cpp @@ -96,13 +96,11 @@ ada_really_inline std::optional prune_hash( ada_really_inline bool shorten_path(std::string& path, ada::scheme::type type) noexcept { - size_t first_delimiter = path.find_first_of('/', 1); - // Let path be url's path. // If url's scheme is "file", path's size is 1, and path[0] is a normalized // Windows drive letter, then return. if (type == ada::scheme::type::FILE && - first_delimiter == std::string_view::npos && !path.empty()) { + path.find('/', 1) == std::string_view::npos && !path.empty()) { if (checkers::is_normalized_windows_drive_letter( helpers::substring(path, 1))) { return false; @@ -121,13 +119,11 @@ ada_really_inline bool shorten_path(std::string& path, ada_really_inline bool shorten_path(std::string_view& path, ada::scheme::type type) noexcept { - size_t first_delimiter = path.find_first_of('/', 1); - // Let path be url's path. // If url's scheme is "file", path's size is 1, and path[0] is a normalized // Windows drive letter, then return. if (type == ada::scheme::type::FILE && - first_delimiter == std::string_view::npos && !path.empty()) { + path.find('/', 1) == std::string_view::npos && !path.empty()) { if (checkers::is_normalized_windows_drive_letter( helpers::substring(path, 1))) { return false; @@ -150,11 +146,7 @@ ada_really_inline void remove_ascii_tab_or_newline( std::string& input) noexcept { // if this ever becomes a performance issue, we could use an approach similar // to has_tabs_or_newline - input.erase(std::remove_if(input.begin(), input.end(), - [](char c) { - return ada::unicode::is_ascii_tab_or_newline(c); - }), - input.end()); + std::erase_if(input, ada::unicode::is_ascii_tab_or_newline); } ada_really_inline constexpr std::string_view substring(std::string_view input, diff --git a/src/parser.cpp b/src/parser.cpp index cbc375675..6937bab4c 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -36,7 +36,7 @@ result_type parse_url_impl(std::string_view user_input, // We refuse to parse URL strings that exceed 4GB. Such strings are almost // surely the result of a bug or are otherwise a security concern. - if (user_input.size() > std::numeric_limits::max()) { + if (user_input.size() > std::numeric_limits::max()) [[unlikely]] { url.is_valid = false; } // Going forward, user_input.size() is in [0, @@ -67,20 +67,19 @@ result_type parse_url_impl(std::string_view user_input, url.reserve(reserve_capacity); } std::string tmp_buffer; - std::string_view internal_input; - if (unicode::has_tabs_or_newline(user_input)) { + std::string_view url_data; + if (unicode::has_tabs_or_newline(user_input)) [[unlikely]] { tmp_buffer = user_input; // Optimization opportunity: Instead of copying and then pruning, we could // just directly build the string from user_input. helpers::remove_ascii_tab_or_newline(tmp_buffer); - internal_input = tmp_buffer; - } else { - internal_input = user_input; + url_data = tmp_buffer; + } else [[likely]] { + url_data = user_input; } // Leading and trailing control characters are uncommon and easy to deal with // (no performance concern). - std::string_view url_data = internal_input; helpers::trim_c0_whitespace(url_data); // Optimization opportunity. Most websites do not have fragment. @@ -238,10 +237,9 @@ result_type parse_url_impl(std::string_view user_input, // TODO: We could do various processing early on, using a single pass // over the string to collect information about it, e.g., telling us // whether there is a @ and if so, where (or how many). - const bool contains_ampersand = - (url_data.find('@', input_position) != std::string_view::npos); - if (!contains_ampersand) { + // Check if url data contains an @. + if (url_data.find('@', input_position) == std::string_view::npos) { state = ada::state::HOST; break; } @@ -253,12 +251,12 @@ result_type parse_url_impl(std::string_view user_input, * --------^ */ do { - std::string_view view = helpers::substring(url_data, input_position); + std::string_view view = url_data.substr(input_position); // The delimiters are @, /, ? \\. size_t location = url.is_special() ? helpers::find_authority_delimiter_special(view) : helpers::find_authority_delimiter(view); - std::string_view authority_view(view.data(), location); + std::string_view authority_view = view.substr(0, location); size_t end_of_authority = input_position + authority_view.size(); // If c is U+0040 (@), then: if ((end_of_authority != input_size) && @@ -362,8 +360,7 @@ result_type parse_url_impl(std::string_view user_input, // If c is U+002F (/) and remaining starts with U+002F (/), // then set state to special authority ignore slashes state and increase // pointer by 1. - std::string_view view = helpers::substring(url_data, input_position); - if (view.starts_with("//")) { + if (url_data.substr(input_position, 2) == "//") { state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; input_position += 2; } else { @@ -430,9 +427,7 @@ result_type parse_url_impl(std::string_view user_input, } else { url.update_base_authority(base_url->get_href(), base_url->get_components()); - // TODO: Get rid of set_hostname and replace it with - // update_base_hostname - url.set_hostname(base_url->get_hostname()); + url.update_host_to_base_host(base_url->get_hostname()); url.update_base_port(base_url->retrieve_base_port()); // cloning the base path includes cloning the has_opaque_path flag url.has_opaque_path = base_url->has_opaque_path; @@ -458,7 +453,7 @@ result_type parse_url_impl(std::string_view user_input, } else { std::string_view path = url.get_pathname(); if (helpers::shorten_path(path, url.type)) { - url.update_base_pathname(std::string(path)); + url.update_base_pathname(std::move(std::string(path))); } } // Set state to path state and decrease pointer by 1. @@ -500,9 +495,7 @@ result_type parse_url_impl(std::string_view user_input, } else { url.update_base_authority(base_url->get_href(), base_url->get_components()); - // TODO: Get rid of set_hostname and replace it with - // update_base_hostname - url.set_hostname(base_url->get_hostname()); + url.update_host_to_base_host(base_url->get_hostname()); url.update_base_port(base_url->retrieve_base_port()); } state = ada::state::PATH; @@ -519,8 +512,7 @@ result_type parse_url_impl(std::string_view user_input, // If c is U+002F (/) and remaining starts with U+002F (/), // then set state to special authority ignore slashes state and increase // pointer by 1. - std::string_view view = helpers::substring(url_data, input_position); - if (view.starts_with("//")) { + if (url_data.substr(input_position, 2) == "//") { input_position += 2; } @@ -553,7 +545,7 @@ result_type parse_url_impl(std::string_view user_input, // Percent-encode after encoding, with encoding, buffer, and // queryPercentEncodeSet, and append the result to url's query. - url.update_base_search(helpers::substring(url_data, input_position), + url.update_base_search(url_data.substr(input_position), query_percent_encode_set); ada_log("QUERY update_base_search completed "); if (fragment.has_value()) { @@ -565,8 +557,7 @@ result_type parse_url_impl(std::string_view user_input, case ada::state::HOST: { ada_log("HOST ", helpers::substring(url_data, input_position)); - std::string_view host_view = - helpers::substring(url_data, input_position); + std::string_view host_view = url_data.substr(input_position); auto [location, found_colon] = helpers::get_host_delimiter_location(url.is_special(), host_view); input_position = (location != std::string_view::npos) @@ -597,7 +588,7 @@ result_type parse_url_impl(std::string_view user_input, else { // If url is special and host_view is the empty string, validation // error, return failure. - if (url.is_special() && host_view.empty()) { + if (host_view.empty() && url.is_special()) { url.is_valid = false; return url; } @@ -620,7 +611,7 @@ result_type parse_url_impl(std::string_view user_input, } case ada::state::OPAQUE_PATH: { ada_log("OPAQUE_PATH ", helpers::substring(url_data, input_position)); - std::string_view view = helpers::substring(url_data, input_position); + std::string_view view = url_data.substr(input_position); // If c is U+003F (?), then set url's query to the empty string and // state to query state. size_t location = view.find('?'); @@ -640,10 +631,8 @@ result_type parse_url_impl(std::string_view user_input, } case ada::state::PORT: { ada_log("PORT ", helpers::substring(url_data, input_position)); - std::string_view port_view = - helpers::substring(url_data, input_position); - size_t consumed_bytes = url.parse_port(port_view, true); - input_position += consumed_bytes; + std::string_view port_view = url_data.substr(input_position); + input_position += url.parse_port(port_view, true); if (!url.is_valid) { return url; } @@ -698,8 +687,8 @@ result_type parse_url_impl(std::string_view user_input, break; } case ada::state::PATH: { - std::string_view view = helpers::substring(url_data, input_position); ada_log("PATH ", helpers::substring(url_data, input_position)); + std::string_view view = url_data.substr(input_position); // Most time, we do not need percent encoding. // Furthermore, we can immediately locate the '?'. @@ -743,8 +732,7 @@ result_type parse_url_impl(std::string_view user_input, if constexpr (result_type_is_ada_url) { url.host = base_url->host; } else { - // TODO: Optimization opportunity. - url.set_host(base_url->get_host()); + url.update_host_to_base_host(base_url->get_host()); } // If the code point substring from pointer to the end of input does // not start with a Windows drive letter and base's path[0] is a @@ -752,7 +740,7 @@ result_type parse_url_impl(std::string_view user_input, // url's path. if (!base_url->get_pathname().empty()) { if (!checkers::is_windows_drive_letter( - helpers::substring(url_data, input_position))) { + url_data.substr(input_position))) { std::string_view first_base_url_path = base_url->get_pathname().substr(1); size_t loc = first_base_url_path.find('/'); @@ -780,8 +768,8 @@ result_type parse_url_impl(std::string_view user_input, break; } case ada::state::FILE_HOST: { - std::string_view view = helpers::substring(url_data, input_position); ada_log("FILE_HOST ", helpers::substring(url_data, input_position)); + std::string_view view = url_data.substr(input_position); size_t location = view.find_first_of("/\\?"); std::string_view file_host_buffer( @@ -827,8 +815,7 @@ result_type parse_url_impl(std::string_view user_input, } case ada::state::FILE: { ada_log("FILE ", helpers::substring(url_data, input_position)); - std::string_view file_view = - helpers::substring(url_data, input_position); + std::string_view file_view = url_data.substr(input_position); url.set_protocol_as_file(); if constexpr (result_type_is_ada_url) { @@ -856,9 +843,7 @@ result_type parse_url_impl(std::string_view user_input, url.path = base_url->path; url.query = base_url->query; } else { - // TODO: Get rid of set_hostname and replace it with - // update_base_hostname - url.set_hostname(base_url->get_hostname()); + url.update_host_to_base_host(base_url->get_hostname()); url.update_base_pathname(base_url->get_pathname()); url.update_base_search(base_url->get_search()); } @@ -881,7 +866,7 @@ result_type parse_url_impl(std::string_view user_input, } else { std::string_view path = url.get_pathname(); if (helpers::shorten_path(path, url.type)) { - url.update_base_pathname(std::string(path)); + url.update_base_pathname(std::move(std::string(path))); } } } diff --git a/src/unicode.cpp b/src/unicode.cpp index c979c0250..78b4e57f4 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -284,7 +284,7 @@ ada_really_inline constexpr bool is_ascii_tab_or_newline( constexpr std::string_view table_is_double_dot_path_segment[] = { "..", "%2e.", ".%2e", "%2e%2e"}; -ada_really_inline ada_constexpr bool is_double_dot_path_segment( +ada_really_inline constexpr bool is_double_dot_path_segment( std::string_view input) noexcept { // This will catch most cases: // The length must be 2,4 or 6. @@ -372,7 +372,6 @@ std::string percent_decode(const std::string_view input, size_t first_percent) { !is_ascii_hex_digit(pointer[2])))) { dest += ch; pointer++; - continue; } else { unsigned a = convert_hex_to_binary(pointer[1]); unsigned b = convert_hex_to_binary(pointer[2]); @@ -386,10 +385,9 @@ std::string percent_decode(const std::string_view input, size_t first_percent) { std::string percent_encode(const std::string_view input, const uint8_t character_set[]) { - auto pointer = - std::find_if(input.begin(), input.end(), [character_set](const char c) { - return character_sets::bit_at(character_set, c); - }); + auto pointer = std::ranges::find_if(input, [character_set](const char c) { + return character_sets::bit_at(character_set, c); + }); // Optimization: Don't iterate if percent encode is not required if (pointer == input.end()) { return std::string(input); diff --git a/src/url-getters.cpp b/src/url-getters.cpp deleted file mode 100644 index 54a7b2a34..000000000 --- a/src/url-getters.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/** - * @file url-getters.cpp - * Includes all the getters of `ada::url` - */ -#include "ada.h" -#include "ada/implementation.h" -#include "ada/helpers.h" -#include "ada/scheme.h" - -#include - -namespace ada { -[[nodiscard]] std::string url::get_origin() const noexcept { - if (is_special()) { - // Return a new opaque origin. - if (type == scheme::FILE) { - return "null"; - } - return ada::helpers::concat(get_protocol(), "//", get_host()); - } - - if (non_special_scheme == "blob") { - if (!path.empty()) { - auto result = ada::parse(path); - if (result && - (result->type == scheme::HTTP || result->type == scheme::HTTPS)) { - // If pathURL's scheme is not "http" and not "https", then return a - // new opaque origin. - return ada::helpers::concat(result->get_protocol(), "//", - result->get_host()); - } - } - } - - // Return a new opaque origin. - return "null"; -} - -[[nodiscard]] std::string url::get_protocol() const noexcept { - if (is_special()) { - return helpers::concat(ada::scheme::details::is_special_list[type], ":"); - } - // We only move the 'scheme' if it is non-special. - return helpers::concat(non_special_scheme, ":"); -} - -[[nodiscard]] std::string url::get_host() const noexcept { - // If url's host is null, then return the empty string. - // If url's port is null, return url's host, serialized. - // Return url's host, serialized, followed by U+003A (:) and url's port, - // serialized. - if (!host.has_value()) { - return ""; - } - if (port.has_value()) { - return host.value() + ":" + get_port(); - } - return host.value(); -} - -[[nodiscard]] std::string url::get_hostname() const noexcept { - return host.value_or(""); -} - -[[nodiscard]] std::string url::get_search() const noexcept { - // If this's URL's query is either null or the empty string, then return the - // empty string. Return U+003F (?), followed by this's URL's query. - return (!query.has_value() || (query.value().empty())) ? "" - : "?" + query.value(); -} - -[[nodiscard]] const std::string& url::get_username() const noexcept { - return username; -} - -[[nodiscard]] const std::string& url::get_password() const noexcept { - return password; -} - -[[nodiscard]] std::string url::get_port() const noexcept { - return port.has_value() ? std::to_string(port.value()) : ""; -} - -[[nodiscard]] std::string url::get_hash() const noexcept { - // If this's URL's fragment is either null or the empty string, then return - // the empty string. Return U+0023 (#), followed by this's URL's fragment. - return (!hash.has_value() || (hash.value().empty())) ? "" - : "#" + hash.value(); -} - -} // namespace ada diff --git a/src/url-setters.cpp b/src/url-setters.cpp deleted file mode 100644 index a368ef3a3..000000000 --- a/src/url-setters.cpp +++ /dev/null @@ -1,236 +0,0 @@ -/** - * @file url-setters.cpp - * Includes all the setters of `ada::url` - */ -#include "ada.h" -#include "ada/helpers.h" - -#include -#include - -namespace ada { - -template -bool url::set_host_or_hostname(const std::string_view input) { - if (has_opaque_path) { - return false; - } - - std::optional previous_host = host; - std::optional previous_port = port; - - size_t host_end_pos = input.find('#'); - std::string _host(input.data(), host_end_pos != std::string_view::npos - ? host_end_pos - : input.size()); - helpers::remove_ascii_tab_or_newline(_host); - std::string_view new_host(_host); - - // If url's scheme is "file", then set state to file host state, instead of - // host state. - if (type != ada::scheme::type::FILE) { - std::string_view host_view(_host.data(), _host.length()); - auto [location, found_colon] = - helpers::get_host_delimiter_location(is_special(), host_view); - - // Otherwise, if c is U+003A (:) and insideBrackets is false, then: - // Note: the 'found_colon' value is true if and only if a colon was - // encountered while not inside brackets. - if (found_colon) { - if constexpr (override_hostname) { - return false; - } - std::string_view buffer = new_host.substr(location + 1); - if (!buffer.empty()) { - set_port(buffer); - } - } - // If url is special and host_view is the empty string, validation error, - // return failure. Otherwise, if state override is given, host_view is the - // empty string, and either url includes credentials or url's port is - // non-null, return. - else if (host_view.empty() && - (is_special() || has_credentials() || port.has_value())) { - return false; - } - - // Let host be the result of host parsing host_view with url is not special. - if (host_view.empty() && !is_special()) { - host = ""; - return true; - } - - bool succeeded = parse_host(host_view); - if (!succeeded) { - host = previous_host; - update_base_port(previous_port); - } - return succeeded; - } - - size_t location = new_host.find_first_of("/\\?"); - if (location != std::string_view::npos) { - new_host.remove_suffix(new_host.length() - location); - } - - if (new_host.empty()) { - // Set url's host to the empty string. - host = ""; - } else { - // Let host be the result of host parsing buffer with url is not special. - if (!parse_host(new_host)) { - host = previous_host; - update_base_port(previous_port); - return false; - } - - // If host is "localhost", then set host to the empty string. - if (host.has_value() && host.value() == "localhost") { - host = ""; - } - } - return true; -} - -bool url::set_host(const std::string_view input) { - return set_host_or_hostname(input); -} - -bool url::set_hostname(const std::string_view input) { - return set_host_or_hostname(input); -} - -bool url::set_username(const std::string_view input) { - if (cannot_have_credentials_or_port()) { - return false; - } - username = ada::unicode::percent_encode( - input, character_sets::USERINFO_PERCENT_ENCODE); - return true; -} - -bool url::set_password(const std::string_view input) { - if (cannot_have_credentials_or_port()) { - return false; - } - password = ada::unicode::percent_encode( - input, character_sets::USERINFO_PERCENT_ENCODE); - return true; -} - -bool url::set_port(const std::string_view input) { - if (cannot_have_credentials_or_port()) { - return false; - } - std::string trimmed(input); - helpers::remove_ascii_tab_or_newline(trimmed); - if (trimmed.empty()) { - port = std::nullopt; - return true; - } - // Input should not start with control characters. - if (ada::unicode::is_c0_control_or_space(trimmed.front())) { - return false; - } - // Input should contain at least one ascii digit. - if (input.find_first_of("0123456789") == std::string_view::npos) { - return false; - } - - // Revert changes if parse_port fails. - std::optional previous_port = port; - parse_port(trimmed); - if (is_valid) { - return true; - } - port = previous_port; - is_valid = true; - return false; -} - -void url::set_hash(const std::string_view input) { - if (input.empty()) { - hash = std::nullopt; - helpers::strip_trailing_spaces_from_opaque_path(*this); - return; - } - - std::string new_value; - new_value = input[0] == '#' ? input.substr(1) : input; - helpers::remove_ascii_tab_or_newline(new_value); - hash = unicode::percent_encode(new_value, - ada::character_sets::FRAGMENT_PERCENT_ENCODE); -} - -void url::set_search(const std::string_view input) { - if (input.empty()) { - query = std::nullopt; - helpers::strip_trailing_spaces_from_opaque_path(*this); - return; - } - - std::string new_value; - new_value = input[0] == '?' ? input.substr(1) : input; - helpers::remove_ascii_tab_or_newline(new_value); - - auto query_percent_encode_set = - is_special() ? ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE - : ada::character_sets::QUERY_PERCENT_ENCODE; - - query = ada::unicode::percent_encode(std::string_view(new_value), - query_percent_encode_set); -} - -bool url::set_pathname(const std::string_view input) { - if (has_opaque_path) { - return false; - } - path = ""; - parse_path(input); - return true; -} - -bool url::set_protocol(const std::string_view input) { - std::string view(input); - helpers::remove_ascii_tab_or_newline(view); - if (view.empty()) { - return true; - } - - // Schemes should start with alpha values. - if (!checkers::is_alpha(view[0])) { - return false; - } - - view.append(":"); - - std::string::iterator pointer = - std::find_if_not(view.begin(), view.end(), unicode::is_alnum_plus); - - if (pointer != view.end() && *pointer == ':') { - return parse_scheme( - std::string_view(view.data(), pointer - view.begin())); - } - return false; -} - -bool url::set_href(const std::string_view input) { - ada::result out = ada::parse(input); - - if (out) { - username = out->username; - password = out->password; - host = out->host; - port = out->port; - path = out->path; - query = out->query; - hash = out->hash; - type = out->type; - non_special_scheme = out->non_special_scheme; - has_opaque_path = out->has_opaque_path; - } - - return out.has_value(); -} - -} // namespace ada diff --git a/src/url.cpp b/src/url.cpp index 657364381..e14b37b75 100644 --- a/src/url.cpp +++ b/src/url.cpp @@ -5,13 +5,14 @@ #include #include #include +#include namespace ada { bool url::parse_opaque_host(std::string_view input) { ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]"); - if (std::any_of(input.begin(), input.end(), - ada::unicode::is_forbidden_host_code_point)) { + if (std::ranges::any_of(input.begin(), input.end(), + ada::unicode::is_forbidden_host_code_point)) { return is_valid = false; } @@ -513,18 +514,14 @@ ada_really_inline void url::parse_path(std::string_view input) { path = "/"; } else if ((internal_input[0] == '/') || (internal_input[0] == '\\')) { helpers::parse_prepared_path(internal_input.substr(1), type, path); - return; } else { helpers::parse_prepared_path(internal_input, type, path); - return; } } else if (!internal_input.empty()) { if (internal_input[0] == '/') { helpers::parse_prepared_path(internal_input.substr(1), type, path); - return; } else { helpers::parse_prepared_path(internal_input, type, path); - return; } } else { if (!host.has_value()) { @@ -589,4 +586,295 @@ ada_really_inline void url::parse_path(std::string_view input) { return checkers::verify_dns_length(host.value()); } +[[nodiscard]] std::string url::get_origin() const noexcept { + if (is_special()) { + // Return a new opaque origin. + if (type == scheme::FILE) { + return "null"; + } + return ada::helpers::concat(get_protocol(), "//", get_host()); + } + + if (non_special_scheme == "blob") { + if (!path.empty()) { + auto result = ada::parse(path); + if (result && + (result->type == scheme::HTTP || result->type == scheme::HTTPS)) { + // If pathURL's scheme is not "http" and not "https", then return a + // new opaque origin. + return ada::helpers::concat(result->get_protocol(), "//", + result->get_host()); + } + } + } + + // Return a new opaque origin. + return "null"; +} + +[[nodiscard]] std::string url::get_protocol() const noexcept { + if (is_special()) { + return helpers::concat(ada::scheme::details::is_special_list[type], ":"); + } + // We only move the 'scheme' if it is non-special. + return helpers::concat(non_special_scheme, ":"); +} + +[[nodiscard]] std::string url::get_host() const noexcept { + // If url's host is null, then return the empty string. + // If url's port is null, return url's host, serialized. + // Return url's host, serialized, followed by U+003A (:) and url's port, + // serialized. + if (!host.has_value()) { + return ""; + } + if (port.has_value()) { + return host.value() + ":" + get_port(); + } + return host.value(); +} + +[[nodiscard]] std::string url::get_hostname() const noexcept { + return host.value_or(""); +} + +[[nodiscard]] std::string url::get_search() const noexcept { + // If this's URL's query is either null or the empty string, then return the + // empty string. Return U+003F (?), followed by this's URL's query. + return (!query.has_value() || (query.value().empty())) ? "" + : "?" + query.value(); +} + +[[nodiscard]] const std::string& url::get_username() const noexcept { + return username; +} + +[[nodiscard]] const std::string& url::get_password() const noexcept { + return password; +} + +[[nodiscard]] std::string url::get_port() const noexcept { + return port.has_value() ? std::to_string(port.value()) : ""; +} + +[[nodiscard]] std::string url::get_hash() const noexcept { + // If this's URL's fragment is either null or the empty string, then return + // the empty string. Return U+0023 (#), followed by this's URL's fragment. + return (!hash.has_value() || (hash.value().empty())) ? "" + : "#" + hash.value(); +} + +template +bool url::set_host_or_hostname(const std::string_view input) { + if (has_opaque_path) { + return false; + } + + std::optional previous_host = host; + std::optional previous_port = port; + + size_t host_end_pos = input.find('#'); + std::string _host(input.data(), host_end_pos != std::string_view::npos + ? host_end_pos + : input.size()); + helpers::remove_ascii_tab_or_newline(_host); + std::string_view new_host(_host); + + // If url's scheme is "file", then set state to file host state, instead of + // host state. + if (type != ada::scheme::type::FILE) { + std::string_view host_view(_host.data(), _host.length()); + auto [location, found_colon] = + helpers::get_host_delimiter_location(is_special(), host_view); + + // Otherwise, if c is U+003A (:) and insideBrackets is false, then: + // Note: the 'found_colon' value is true if and only if a colon was + // encountered while not inside brackets. + if (found_colon) { + if constexpr (override_hostname) { + return false; + } + std::string_view buffer = new_host.substr(location + 1); + if (!buffer.empty()) { + set_port(buffer); + } + } + // If url is special and host_view is the empty string, validation error, + // return failure. Otherwise, if state override is given, host_view is the + // empty string, and either url includes credentials or url's port is + // non-null, return. + else if (host_view.empty() && + (is_special() || has_credentials() || port.has_value())) { + return false; + } + + // Let host be the result of host parsing host_view with url is not special. + if (host_view.empty() && !is_special()) { + host = ""; + return true; + } + + bool succeeded = parse_host(host_view); + if (!succeeded) { + host = previous_host; + update_base_port(previous_port); + } + return succeeded; + } + + size_t location = new_host.find_first_of("/\\?"); + if (location != std::string_view::npos) { + new_host.remove_suffix(new_host.length() - location); + } + + if (new_host.empty()) { + // Set url's host to the empty string. + host = ""; + } else { + // Let host be the result of host parsing buffer with url is not special. + if (!parse_host(new_host)) { + host = previous_host; + update_base_port(previous_port); + return false; + } + + // If host is "localhost", then set host to the empty string. + if (host.has_value() && host.value() == "localhost") { + host = ""; + } + } + return true; +} + +bool url::set_host(const std::string_view input) { + return set_host_or_hostname(input); +} + +bool url::set_hostname(const std::string_view input) { + return set_host_or_hostname(input); +} + +bool url::set_username(const std::string_view input) { + if (cannot_have_credentials_or_port()) { + return false; + } + username = ada::unicode::percent_encode( + input, character_sets::USERINFO_PERCENT_ENCODE); + return true; +} + +bool url::set_password(const std::string_view input) { + if (cannot_have_credentials_or_port()) { + return false; + } + password = ada::unicode::percent_encode( + input, character_sets::USERINFO_PERCENT_ENCODE); + return true; +} + +bool url::set_port(const std::string_view input) { + if (cannot_have_credentials_or_port()) { + return false; + } + std::string trimmed(input); + helpers::remove_ascii_tab_or_newline(trimmed); + if (trimmed.empty()) { + port = std::nullopt; + return true; + } + // Input should not start with control characters. + if (ada::unicode::is_c0_control_or_space(trimmed.front())) { + return false; + } + // Input should contain at least one ascii digit. + if (input.find_first_of("0123456789") == std::string_view::npos) { + return false; + } + + // Revert changes if parse_port fails. + std::optional previous_port = port; + parse_port(trimmed); + if (is_valid) { + return true; + } + port = previous_port; + is_valid = true; + return false; +} + +void url::set_hash(const std::string_view input) { + if (input.empty()) { + hash = std::nullopt; + helpers::strip_trailing_spaces_from_opaque_path(*this); + return; + } + + std::string new_value; + new_value = input[0] == '#' ? input.substr(1) : input; + helpers::remove_ascii_tab_or_newline(new_value); + hash = unicode::percent_encode(new_value, + ada::character_sets::FRAGMENT_PERCENT_ENCODE); +} + +void url::set_search(const std::string_view input) { + if (input.empty()) { + query = std::nullopt; + helpers::strip_trailing_spaces_from_opaque_path(*this); + return; + } + + std::string new_value; + new_value = input[0] == '?' ? input.substr(1) : input; + helpers::remove_ascii_tab_or_newline(new_value); + + auto query_percent_encode_set = + is_special() ? ada::character_sets::SPECIAL_QUERY_PERCENT_ENCODE + : ada::character_sets::QUERY_PERCENT_ENCODE; + + query = ada::unicode::percent_encode(new_value, query_percent_encode_set); +} + +bool url::set_pathname(const std::string_view input) { + if (has_opaque_path) { + return false; + } + path = ""; + parse_path(input); + return true; +} + +bool url::set_protocol(const std::string_view input) { + std::string view(input); + helpers::remove_ascii_tab_or_newline(view); + if (view.empty()) { + return true; + } + + // Schemes should start with alpha values. + if (!checkers::is_alpha(view[0])) { + return false; + } + + view.append(":"); + + std::string::iterator pointer = + std::ranges::find_if_not(view, unicode::is_alnum_plus); + + if (pointer != view.end() && *pointer == ':') { + return parse_scheme( + std::string_view(view.data(), pointer - view.begin())); + } + return false; +} + +bool url::set_href(const std::string_view input) { + ada::result out = ada::parse(input); + + if (out) { + *this = *out; + } + + return out.has_value(); +} + } // namespace ada diff --git a/src/url_aggregator.cpp b/src/url_aggregator.cpp index 48ed79b8e..2c431cef1 100644 --- a/src/url_aggregator.cpp +++ b/src/url_aggregator.cpp @@ -22,7 +22,7 @@ template std::string_view input{input_with_colon}; input.remove_suffix(1); auto parsed_type = ada::scheme::get_scheme_type(input); - bool is_input_special = (parsed_type != ada::scheme::NOT_SPECIAL); + const bool is_input_special = (parsed_type != ada::scheme::NOT_SPECIAL); /** * In the common case, we will immediately recognize a special scheme (e.g., *http, https), in which case, we can go really fast. @@ -224,7 +224,7 @@ bool url_aggregator::set_protocol(const std::string_view input) { view.append(":"); std::string::iterator pointer = - std::find_if_not(view.begin(), view.end(), unicode::is_alnum_plus); + std::ranges::find_if_not(view, unicode::is_alnum_plus); if (pointer != view.end() && *pointer == ':') { return parse_scheme_with_colon( diff --git a/tests/wpt/urltestdata.json b/tests/wpt/urltestdata.json index 9dbe5456a..0ebaf4cd4 100644 --- a/tests/wpt/urltestdata.json +++ b/tests/wpt/urltestdata.json @@ -9992,5 +9992,101 @@ "pathname": "/", "search": "", "hash": "" + }, + "# Non-special URL and backslashes", + { + "input": "non-special:\\\\opaque", + "base": null, + "href": "non-special:\\\\opaque", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "\\\\opaque", + "search": "", + "hash": "" + }, + { + "input": "non-special:\\\\opaque/path", + "base": null, + "href": "non-special:\\\\opaque/path", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "\\\\opaque/path", + "search": "", + "hash": "" + }, + { + "input": "non-special:\\\\opaque\\path", + "base": null, + "href": "non-special:\\\\opaque\\path", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "\\\\opaque\\path", + "search": "", + "hash": "" + }, + { + "input": "non-special:\\/opaque", + "base": null, + "href": "non-special:\\/opaque", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "\\/opaque", + "search": "", + "hash": "" + }, + { + "input": "non-special:/\\path", + "base": null, + "href": "non-special:/\\path", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/\\path", + "search": "", + "hash": "" + }, + { + "input": "non-special://host\\a", + "base": null, + "failure": true + }, + { + "input": "non-special://host/a\\b", + "base": null, + "href": "non-special://host/a\\b", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/a\\b", + "search": "", + "hash": "" } ] diff --git a/tools/release/requirements.txt b/tools/release/requirements.txt index 4feab79a3..2a7ace486 100644 --- a/tools/release/requirements.txt +++ b/tools/release/requirements.txt @@ -1,2 +1,2 @@ -PyGithub==2.4.0 -pytest==8.3.2 +PyGithub==2.5.0 +pytest==8.3.3 diff --git a/tools/run-clangcldocker.sh b/tools/run-clangcldocker.sh index c6881acde..5f6c0713d 100755 --- a/tools/run-clangcldocker.sh +++ b/tools/run-clangcldocker.sh @@ -3,7 +3,7 @@ set -e COMMAND=$* SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" MAINSOURCE=$SCRIPTPATH/.. -ALL_ADA_FILES=$(cd $MAINSOURCE && git ls-tree --full-tree --name-only -r HEAD | grep -e ".*\.\(c\|h\|cc\|cpp\|hh\)\$" | grep -vFf clang-format-ignore.txt) +ALL_ADA_FILES=$(cd $MAINSOURCE && git ls-tree --full-tree --name-only -r HEAD | grep -e ".*\.\(c\|h\|cc\|cpp\|hh\)\$") if clang-format-17 --version 2>/dev/null | grep -qF 'version 17.'; then cd $MAINSOURCE; clang-format-17 --style=file --verbose -i "$@" $ALL_ADA_FILES